blob: 1e23d26aef7432e8cb47f6a8127912cb2751f67f [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020046#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000048#include <libxml/threads.h>
49#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000050#include <libxml/tree.h>
51#include <libxml/parser.h>
52#include <libxml/parserInternals.h>
53#include <libxml/valid.h>
54#include <libxml/entities.h>
55#include <libxml/xmlerror.h>
56#include <libxml/encoding.h>
57#include <libxml/xmlIO.h>
58#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000059#ifdef LIBXML_CATALOG_ENABLED
60#include <libxml/catalog.h>
61#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000062#ifdef LIBXML_SCHEMAS_ENABLED
63#include <libxml/xmlschemastypes.h>
64#include <libxml/relaxng.h>
65#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066#ifdef HAVE_CTYPE_H
67#include <ctype.h>
68#endif
69#ifdef HAVE_STDLIB_H
70#include <stdlib.h>
71#endif
72#ifdef HAVE_SYS_STAT_H
73#include <sys/stat.h>
74#endif
75#ifdef HAVE_FCNTL_H
76#include <fcntl.h>
77#endif
78#ifdef HAVE_UNISTD_H
79#include <unistd.h>
80#endif
81#ifdef HAVE_ZLIB_H
82#include <zlib.h>
83#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020084#ifdef HAVE_LZMA_H
85#include <lzma.h>
86#endif
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard768eb3b2012-07-16 14:19:49 +080088#include "buf.h"
89#include "enc.h"
90
Daniel Veillard0161e632008-08-28 15:36:32 +000091static void
92xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93
Rob Richards9c0aa472009-03-26 18:10:19 +000094static xmlParserCtxtPtr
95xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
96 const xmlChar *base, xmlParserCtxtPtr pctx);
97
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080098static void xmlHaltParser(xmlParserCtxtPtr ctxt);
99
Daniel Veillard0161e632008-08-28 15:36:32 +0000100/************************************************************************
101 * *
102 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 * *
104 ************************************************************************/
105
106#define XML_PARSER_BIG_ENTITY 1000
107#define XML_PARSER_LOT_ENTITY 5000
108
109/*
110 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
111 * replacement over the size in byte of the input indicates that you have
112 * and eponential behaviour. A value of 10 correspond to at least 3 entity
113 * replacement per byte of input.
114 */
115#define XML_PARSER_NON_LINEAR 10
116
117/*
118 * xmlParserEntityCheck
119 *
120 * Function to check non-linear entity expansion behaviour
121 * This is here to detect and stop exponential linear entity expansion
122 * This is not a limitation of the parser but a safety
123 * boundary feature. It can be disabled with the XML_PARSE_HUGE
124 * parser option.
125 */
126static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800128 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000129{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800130 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000131
132 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
133 return (0);
134 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
135 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800136
137 /*
138 * This may look absurd but is needed to detect
139 * entities problems
140 */
141 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800142 (ent->content != NULL) && (ent->checked == 0) &&
143 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800144 unsigned long oldnbent = ctxt->nbentities;
145 xmlChar *rep;
146
147 ent->checked = 1;
148
Peter Simons8f30bdf2016-04-15 11:56:55 +0200149 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800150 rep = xmlStringDecodeEntities(ctxt, ent->content,
151 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200152 --ctxt->depth;
Daniel Veillardbdd66182016-05-23 12:27:58 +0800153 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
154 ent->content[0] = 0;
155 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800156
157 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
158 if (rep != NULL) {
159 if (xmlStrchr(rep, '<'))
160 ent->checked |= 1;
161 xmlFree(rep);
162 rep = NULL;
163 }
164 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800165 if (replacement != 0) {
166 if (replacement < XML_MAX_TEXT_LENGTH)
167 return(0);
168
169 /*
170 * If the volume of entity copy reaches 10 times the
171 * amount of parsed data and over the large text threshold
172 * then that's very likely to be an abuse.
173 */
174 if (ctxt->input != NULL) {
175 consumed = ctxt->input->consumed +
176 (ctxt->input->cur - ctxt->input->base);
177 }
178 consumed += ctxt->sizeentities;
179
180 if (replacement < XML_PARSER_NON_LINEAR * consumed)
181 return(0);
182 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000183 /*
184 * Do the check based on the replacement size of the entity
185 */
186 if (size < XML_PARSER_BIG_ENTITY)
187 return(0);
188
189 /*
190 * A limit on the amount of text data reasonably used
191 */
192 if (ctxt->input != NULL) {
193 consumed = ctxt->input->consumed +
194 (ctxt->input->cur - ctxt->input->base);
195 }
196 consumed += ctxt->sizeentities;
197
198 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
199 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
200 return (0);
201 } else if (ent != NULL) {
202 /*
203 * use the number of parsed entities in the replacement
204 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800205 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000206
207 /*
208 * The amount of data parsed counting entities size only once
209 */
210 if (ctxt->input != NULL) {
211 consumed = ctxt->input->consumed +
212 (ctxt->input->cur - ctxt->input->base);
213 }
214 consumed += ctxt->sizeentities;
215
216 /*
217 * Check the density of entities for the amount of data
218 * knowing an entity reference will take at least 3 bytes
219 */
220 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
221 return (0);
222 } else {
223 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800224 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000225 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800226 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
227 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
228 (ctxt->nbentities <= 10000))
229 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000231 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
232 return (1);
233}
234
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000235/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000236 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000237 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000238 * arbitrary depth limit for the XML documents that we allow to
239 * process. This is not a limitation of the parser but a safety
240 * boundary feature. It can be disabled with the XML_PARSE_HUGE
241 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000242 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000243unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000244
Daniel Veillard0fb18932003-09-07 09:14:37 +0000245
Daniel Veillard0161e632008-08-28 15:36:32 +0000246
247#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000248#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000249#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000250#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
251
Daniel Veillard1f972e92012-08-15 10:16:37 +0800252/**
253 * XML_PARSER_CHUNK_SIZE
254 *
255 * When calling GROW that's the minimal amount of data
256 * the parser expected to have received. It is not a hard
257 * limit but an optimization when reading strings like Names
258 * It is not strictly needed as long as inputs available characters
259 * are followed by 0, which should be provided by the I/O level
260 */
261#define XML_PARSER_CHUNK_SIZE 100
262
Owen Taylor3473f882001-02-23 17:55:21 +0000263/*
Owen Taylor3473f882001-02-23 17:55:21 +0000264 * List of XML prefixed PI allowed by W3C specs
265 */
266
Daniel Veillardb44025c2001-10-11 22:55:55 +0000267static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000268 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800269 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000270 NULL
271};
272
Daniel Veillarda07050d2003-10-19 14:46:32 +0000273
Owen Taylor3473f882001-02-23 17:55:21 +0000274/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200275static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
276 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000277
Daniel Veillard7d515752003-09-26 19:12:37 +0000278static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000279xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
280 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000281 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000282 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000283
Daniel Veillard37334572008-07-31 08:20:02 +0000284static int
285xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
286 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000287#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000288static void
289xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
290 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000291#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000292
Daniel Veillard7d515752003-09-26 19:12:37 +0000293static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000294xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
295 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000296
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000297static int
298xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
299
Daniel Veillarde57ec792003-09-10 10:50:59 +0000300/************************************************************************
301 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800302 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 * *
304 ************************************************************************/
305
306/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 * xmlErrAttributeDup:
308 * @ctxt: an XML parser context
309 * @prefix: the attribute prefix
310 * @localname: the attribute localname
311 *
312 * Handle a redefinition of attribute error
313 */
314static void
315xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
316 const xmlChar * localname)
317{
Daniel Veillard157fee02003-10-31 10:36:03 +0000318 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
319 (ctxt->instate == XML_PARSER_EOF))
320 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000321 if (ctxt != NULL)
322 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200323
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000324 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000325 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200326 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 (const char *) localname, NULL, NULL, 0, 0,
328 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000329 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 (const char *) prefix, (const char *) localname,
333 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
334 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000335 if (ctxt != NULL) {
336 ctxt->wellFormed = 0;
337 if (ctxt->recovery == 0)
338 ctxt->disableSAX = 1;
339 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340}
341
342/**
343 * xmlFatalErr:
344 * @ctxt: an XML parser context
345 * @error: the error number
346 * @extra: extra information string
347 *
348 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
349 */
350static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352{
353 const char *errmsg;
354
Daniel Veillard157fee02003-10-31 10:36:03 +0000355 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356 (ctxt->instate == XML_PARSER_EOF))
357 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 switch (error) {
359 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800360 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800363 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800366 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "internal error";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800372 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800375 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800378 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800381 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800384 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800387 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000389 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800390 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000392 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800393 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800396 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800399 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800402 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000403 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000404 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800405 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800408 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800411 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800414 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800417 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800420 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800423 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800426 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Fragment not allowed";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800441 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800444 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800447 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800450 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800453 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800456 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800459 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
474 case XML_ERR_CONDSEC_INVALID_KEYWORD:
475 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800476 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000477 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000478 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800479 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000480 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800482 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000484 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800485 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000487 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000489 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000490 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800491 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000493 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800494 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000496 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000499 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800503 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000504 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000507 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800509 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000510 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000511 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800512 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000513 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000514 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800515 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000516 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000517 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800518 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800521 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000522 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000523 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800524 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000525 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000526 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800527 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000528 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800529 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800530 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800531 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000532#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800534 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000535 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000536#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000537 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800538 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000539 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000540 if (ctxt != NULL)
541 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800542 if (info == NULL) {
543 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
544 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
545 errmsg);
546 } else {
547 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
548 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
549 errmsg, info);
550 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000551 if (ctxt != NULL) {
552 ctxt->wellFormed = 0;
553 if (ctxt->recovery == 0)
554 ctxt->disableSAX = 1;
555 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000556}
557
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558/**
559 * xmlFatalErrMsg:
560 * @ctxt: an XML parser context
561 * @error: the error number
562 * @msg: the error message
563 *
564 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
565 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800566static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000567xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200576 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000577 if (ctxt != NULL) {
578 ctxt->wellFormed = 0;
579 if (ctxt->recovery == 0)
580 ctxt->disableSAX = 1;
581 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582}
583
584/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585 * xmlWarningMsg:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @str1: extra data
590 * @str2: extra data
591 *
592 * Handle a warning.
593 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800594static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg, const xmlChar *str1, const xmlChar *str2)
597{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000598 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000599
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
604 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000605 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200606 if (ctxt != NULL) {
607 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000608 (ctxt->sax) ? ctxt->sax->warning : NULL,
609 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000610 ctxt, NULL, XML_FROM_PARSER, error,
611 XML_ERR_WARNING, NULL, 0,
612 (const char *) str1, (const char *) str2, NULL, 0, 0,
613 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200614 } else {
615 __xmlRaiseError(schannel, NULL, NULL,
616 ctxt, NULL, XML_FROM_PARSER, error,
617 XML_ERR_WARNING, NULL, 0,
618 (const char *) str1, (const char *) str2, NULL, 0, 0,
619 msg, (const char *) str1, (const char *) str2);
620 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000621}
622
623/**
624 * xmlValidityError:
625 * @ctxt: an XML parser context
626 * @error: the error number
627 * @msg: the error message
628 * @str1: extra data
629 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000630 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000631 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800632static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000633xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000634 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000635{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000636 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000637
638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL) {
642 ctxt->errNo = error;
643 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
644 schannel = ctxt->sax->serror;
645 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200646 if (ctxt != NULL) {
647 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000648 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000649 ctxt, NULL, XML_FROM_DTD, error,
650 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000651 (const char *) str2, NULL, 0, 0,
652 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000653 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200654 } else {
655 __xmlRaiseError(schannel, NULL, NULL,
656 ctxt, NULL, XML_FROM_DTD, error,
657 XML_ERR_ERROR, NULL, 0, (const char *) str1,
658 (const char *) str2, NULL, 0, 0,
659 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000660 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000661}
662
663/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000664 * xmlFatalErrMsgInt:
665 * @ctxt: an XML parser context
666 * @error: the error number
667 * @msg: the error message
668 * @val: an integer value
669 *
670 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
671 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800672static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000673xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000674 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000675{
Daniel Veillard157fee02003-10-31 10:36:03 +0000676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677 (ctxt->instate == XML_PARSER_EOF))
678 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000679 if (ctxt != NULL)
680 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000681 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000682 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
683 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000684 if (ctxt != NULL) {
685 ctxt->wellFormed = 0;
686 if (ctxt->recovery == 0)
687 ctxt->disableSAX = 1;
688 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000689}
690
691/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000692 * xmlFatalErrMsgStrIntStr:
693 * @ctxt: an XML parser context
694 * @error: the error number
695 * @msg: the error message
696 * @str1: an string info
697 * @val: an integer value
698 * @str2: an string info
699 *
700 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
701 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800702static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000703xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800704 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000705 const xmlChar *str2)
706{
Daniel Veillard157fee02003-10-31 10:36:03 +0000707 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
708 (ctxt->instate == XML_PARSER_EOF))
709 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000710 if (ctxt != NULL)
711 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000712 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000713 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
714 NULL, 0, (const char *) str1, (const char *) str2,
715 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL) {
717 ctxt->wellFormed = 0;
718 if (ctxt->recovery == 0)
719 ctxt->disableSAX = 1;
720 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000721}
722
723/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000724 * xmlFatalErrMsgStr:
725 * @ctxt: an XML parser context
726 * @error: the error number
727 * @msg: the error message
728 * @val: a string value
729 *
730 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
731 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800732static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000733xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000734 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000735{
Daniel Veillard157fee02003-10-31 10:36:03 +0000736 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
737 (ctxt->instate == XML_PARSER_EOF))
738 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000739 if (ctxt != NULL)
740 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000741 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000742 XML_FROM_PARSER, error, XML_ERR_FATAL,
743 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
744 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000745 if (ctxt != NULL) {
746 ctxt->wellFormed = 0;
747 if (ctxt->recovery == 0)
748 ctxt->disableSAX = 1;
749 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000750}
751
752/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000753 * xmlErrMsgStr:
754 * @ctxt: an XML parser context
755 * @error: the error number
756 * @msg: the error message
757 * @val: a string value
758 *
759 * Handle a non fatal parser error
760 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800761static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000762xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
763 const char *msg, const xmlChar * val)
764{
Daniel Veillard157fee02003-10-31 10:36:03 +0000765 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
766 (ctxt->instate == XML_PARSER_EOF))
767 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000768 if (ctxt != NULL)
769 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000770 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000771 XML_FROM_PARSER, error, XML_ERR_ERROR,
772 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
773 val);
774}
775
776/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000777 * xmlNsErr:
778 * @ctxt: an XML parser context
779 * @error: the error number
780 * @msg: the message
781 * @info1: extra information string
782 * @info2: extra information string
783 *
784 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
785 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800786static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000787xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
788 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000789 const xmlChar * info1, const xmlChar * info2,
790 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000791{
Daniel Veillard157fee02003-10-31 10:36:03 +0000792 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793 (ctxt->instate == XML_PARSER_EOF))
794 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000795 if (ctxt != NULL)
796 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000797 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000798 XML_ERR_ERROR, NULL, 0, (const char *) info1,
799 (const char *) info2, (const char *) info3, 0, 0, msg,
800 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000801 if (ctxt != NULL)
802 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000803}
804
Daniel Veillard37334572008-07-31 08:20:02 +0000805/**
806 * xmlNsWarn
807 * @ctxt: an XML parser context
808 * @error: the error number
809 * @msg: the message
810 * @info1: extra information string
811 * @info2: extra information string
812 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800813 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000814 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800815static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000816xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
817 const char *msg,
818 const xmlChar * info1, const xmlChar * info2,
819 const xmlChar * info3)
820{
821 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
822 (ctxt->instate == XML_PARSER_EOF))
823 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000824 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825 XML_ERR_WARNING, NULL, 0, (const char *) info1,
826 (const char *) info2, (const char *) info3, 0, 0, msg,
827 info1, info2, info3);
828}
829
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000830/************************************************************************
831 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800832 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833 * *
834 ************************************************************************/
835
836/**
837 * xmlHasFeature:
838 * @feature: the feature to be examined
839 *
840 * Examines if the library has been compiled with a given feature.
841 *
842 * Returns a non-zero value if the feature exist, otherwise zero.
843 * Returns zero (0) if the feature does not exist or an unknown
844 * unknown feature is requested, non-zero otherwise.
845 */
846int
847xmlHasFeature(xmlFeature feature)
848{
849 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_THREAD_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_TREE_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_OUTPUT_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_PUSH_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_READER_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_PATTERN_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_WRITER_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_SAX1_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_FTP_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_HTTP_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_VALID_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_HTML_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_LEGACY_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_C14N_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_CATALOG_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_XPATH_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_XPTR_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_XINCLUDE_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_ICONV_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_ISO8859X_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_UNICODE_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_REGEXP_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef LIBXML_AUTOMATA_ENABLED
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_EXPR_ENABLED
990 return(1);
991#else
992 return(0);
993#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000994 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000995#ifdef LIBXML_SCHEMAS_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001000 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001001#ifdef LIBXML_SCHEMATRON_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001006 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001007#ifdef LIBXML_MODULES_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001012 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001013#ifdef LIBXML_DEBUG_ENABLED
1014 return(1);
1015#else
1016 return(0);
1017#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001018 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001019#ifdef DEBUG_MEMORY_LOCATION
1020 return(1);
1021#else
1022 return(0);
1023#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001024 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001025#ifdef LIBXML_DEBUG_RUNTIME
1026 return(1);
1027#else
1028 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001029#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001030 case XML_WITH_ZLIB:
1031#ifdef LIBXML_ZLIB_ENABLED
1032 return(1);
1033#else
1034 return(0);
1035#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001036 case XML_WITH_LZMA:
1037#ifdef LIBXML_LZMA_ENABLED
1038 return(1);
1039#else
1040 return(0);
1041#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001042 case XML_WITH_ICU:
1043#ifdef LIBXML_ICU_ENABLED
1044 return(1);
1045#else
1046 return(0);
1047#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001048 default:
1049 break;
1050 }
1051 return(0);
1052}
1053
1054/************************************************************************
1055 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001056 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057 * *
1058 ************************************************************************/
1059
1060/**
1061 * xmlDetectSAX2:
1062 * @ctxt: an XML parser context
1063 *
1064 * Do the SAX2 detection and specific intialization
1065 */
1066static void
1067xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1068 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001069#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001070 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1071 ((ctxt->sax->startElementNs != NULL) ||
1072 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001073#else
1074 ctxt->sax2 = 1;
1075#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001076
1077 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1078 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1079 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001080 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1081 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001082 xmlErrMemory(ctxt, NULL);
1083 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001084}
1085
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086typedef struct _xmlDefAttrs xmlDefAttrs;
1087typedef xmlDefAttrs *xmlDefAttrsPtr;
1088struct _xmlDefAttrs {
1089 int nbAttrs; /* number of defaulted attributes on that element */
1090 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001091#if __STDC_VERSION__ >= 199901L
1092 /* Using a C99 flexible array member avoids UBSan errors. */
1093 const xmlChar *values[]; /* array of localname/prefix/values/external */
1094#else
1095 const xmlChar *values[5];
1096#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001097};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001098
1099/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001100 * xmlAttrNormalizeSpace:
1101 * @src: the source string
1102 * @dst: the target string
1103 *
1104 * Normalize the space in non CDATA attribute values:
1105 * If the attribute type is not CDATA, then the XML processor MUST further
1106 * process the normalized attribute value by discarding any leading and
1107 * trailing space (#x20) characters, and by replacing sequences of space
1108 * (#x20) characters by a single space (#x20) character.
1109 * Note that the size of dst need to be at least src, and if one doesn't need
1110 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1111 * passing src as dst is just fine.
1112 *
1113 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1114 * is needed.
1115 */
1116static xmlChar *
1117xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1118{
1119 if ((src == NULL) || (dst == NULL))
1120 return(NULL);
1121
1122 while (*src == 0x20) src++;
1123 while (*src != 0) {
1124 if (*src == 0x20) {
1125 while (*src == 0x20) src++;
1126 if (*src != 0)
1127 *dst++ = 0x20;
1128 } else {
1129 *dst++ = *src++;
1130 }
1131 }
1132 *dst = 0;
1133 if (dst == src)
1134 return(NULL);
1135 return(dst);
1136}
1137
1138/**
1139 * xmlAttrNormalizeSpace2:
1140 * @src: the source string
1141 *
1142 * Normalize the space in non CDATA attribute values, a slightly more complex
1143 * front end to avoid allocation problems when running on attribute values
1144 * coming from the input.
1145 *
1146 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1147 * is needed.
1148 */
1149static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001151{
1152 int i;
1153 int remove_head = 0;
1154 int need_realloc = 0;
1155 const xmlChar *cur;
1156
1157 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1158 return(NULL);
1159 i = *len;
1160 if (i <= 0)
1161 return(NULL);
1162
1163 cur = src;
1164 while (*cur == 0x20) {
1165 cur++;
1166 remove_head++;
1167 }
1168 while (*cur != 0) {
1169 if (*cur == 0x20) {
1170 cur++;
1171 if ((*cur == 0x20) || (*cur == 0)) {
1172 need_realloc = 1;
1173 break;
1174 }
1175 } else
1176 cur++;
1177 }
1178 if (need_realloc) {
1179 xmlChar *ret;
1180
1181 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1182 if (ret == NULL) {
1183 xmlErrMemory(ctxt, NULL);
1184 return(NULL);
1185 }
1186 xmlAttrNormalizeSpace(ret, ret);
1187 *len = (int) strlen((const char *)ret);
1188 return(ret);
1189 } else if (remove_head) {
1190 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001191 memmove(src, src + remove_head, 1 + *len);
1192 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001193 }
1194 return(NULL);
1195}
1196
1197/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001198 * xmlAddDefAttrs:
1199 * @ctxt: an XML parser context
1200 * @fullname: the element fullname
1201 * @fullattr: the attribute fullname
1202 * @value: the attribute value
1203 *
1204 * Add a defaulted attribute for an element
1205 */
1206static void
1207xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1208 const xmlChar *fullname,
1209 const xmlChar *fullattr,
1210 const xmlChar *value) {
1211 xmlDefAttrsPtr defaults;
1212 int len;
1213 const xmlChar *name;
1214 const xmlChar *prefix;
1215
Daniel Veillard6a31b832008-03-26 14:06:44 +00001216 /*
1217 * Allows to detect attribute redefinitions
1218 */
1219 if (ctxt->attsSpecial != NULL) {
1220 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1221 return;
1222 }
1223
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001225 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 if (ctxt->attsDefault == NULL)
1227 goto mem_error;
1228 }
1229
1230 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001231 * split the element name into prefix:localname , the string found
1232 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001233 */
1234 name = xmlSplitQName3(fullname, &len);
1235 if (name == NULL) {
1236 name = xmlDictLookup(ctxt->dict, fullname, -1);
1237 prefix = NULL;
1238 } else {
1239 name = xmlDictLookup(ctxt->dict, name, -1);
1240 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1241 }
1242
1243 /*
1244 * make sure there is some storage
1245 */
1246 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1247 if (defaults == NULL) {
1248 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001249 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001250 if (defaults == NULL)
1251 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001252 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001253 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001254 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1255 defaults, NULL) < 0) {
1256 xmlFree(defaults);
1257 goto mem_error;
1258 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001259 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001260 xmlDefAttrsPtr temp;
1261
1262 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001263 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001264 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001265 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001266 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001267 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001268 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1269 defaults, NULL) < 0) {
1270 xmlFree(defaults);
1271 goto mem_error;
1272 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001273 }
1274
1275 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001276 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001277 * are within the DTD and hen not associated to namespace names.
1278 */
1279 name = xmlSplitQName3(fullattr, &len);
1280 if (name == NULL) {
1281 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1282 prefix = NULL;
1283 } else {
1284 name = xmlDictLookup(ctxt->dict, name, -1);
1285 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1286 }
1287
Daniel Veillardae0765b2008-07-31 19:54:59 +00001288 defaults->values[5 * defaults->nbAttrs] = name;
1289 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001290 /* intern the string and precompute the end */
1291 len = xmlStrlen(value);
1292 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001293 defaults->values[5 * defaults->nbAttrs + 2] = value;
1294 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1295 if (ctxt->external)
1296 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1297 else
1298 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001299 defaults->nbAttrs++;
1300
1301 return;
1302
1303mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001304 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001305 return;
1306}
1307
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001308/**
1309 * xmlAddSpecialAttr:
1310 * @ctxt: an XML parser context
1311 * @fullname: the element fullname
1312 * @fullattr: the attribute fullname
1313 * @type: the attribute type
1314 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001315 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001316 */
1317static void
1318xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1319 const xmlChar *fullname,
1320 const xmlChar *fullattr,
1321 int type)
1322{
1323 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001324 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001325 if (ctxt->attsSpecial == NULL)
1326 goto mem_error;
1327 }
1328
Daniel Veillardac4118d2008-01-11 05:27:32 +00001329 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1330 return;
1331
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001332 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1333 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001334 return;
1335
1336mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001337 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001338 return;
1339}
1340
Daniel Veillard4432df22003-09-28 18:58:27 +00001341/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001342 * xmlCleanSpecialAttrCallback:
1343 *
1344 * Removes CDATA attributes from the special attribute table
1345 */
1346static void
1347xmlCleanSpecialAttrCallback(void *payload, void *data,
1348 const xmlChar *fullname, const xmlChar *fullattr,
1349 const xmlChar *unused ATTRIBUTE_UNUSED) {
1350 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1351
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001352 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001353 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1354 }
1355}
1356
1357/**
1358 * xmlCleanSpecialAttr:
1359 * @ctxt: an XML parser context
1360 *
1361 * Trim the list of attributes defined to remove all those of type
1362 * CDATA as they are not special. This call should be done when finishing
1363 * to parse the DTD and before starting to parse the document root.
1364 */
1365static void
1366xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1367{
1368 if (ctxt->attsSpecial == NULL)
1369 return;
1370
1371 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1372
1373 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1374 xmlHashFree(ctxt->attsSpecial, NULL);
1375 ctxt->attsSpecial = NULL;
1376 }
1377 return;
1378}
1379
1380/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001381 * xmlCheckLanguageID:
1382 * @lang: pointer to the string value
1383 *
1384 * Checks that the value conforms to the LanguageID production:
1385 *
1386 * NOTE: this is somewhat deprecated, those productions were removed from
1387 * the XML Second edition.
1388 *
1389 * [33] LanguageID ::= Langcode ('-' Subcode)*
1390 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1391 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1392 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1393 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1394 * [38] Subcode ::= ([a-z] | [A-Z])+
1395 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001396 * The current REC reference the sucessors of RFC 1766, currently 5646
1397 *
1398 * http://www.rfc-editor.org/rfc/rfc5646.txt
1399 * langtag = language
1400 * ["-" script]
1401 * ["-" region]
1402 * *("-" variant)
1403 * *("-" extension)
1404 * ["-" privateuse]
1405 * language = 2*3ALPHA ; shortest ISO 639 code
1406 * ["-" extlang] ; sometimes followed by
1407 * ; extended language subtags
1408 * / 4ALPHA ; or reserved for future use
1409 * / 5*8ALPHA ; or registered language subtag
1410 *
1411 * extlang = 3ALPHA ; selected ISO 639 codes
1412 * *2("-" 3ALPHA) ; permanently reserved
1413 *
1414 * script = 4ALPHA ; ISO 15924 code
1415 *
1416 * region = 2ALPHA ; ISO 3166-1 code
1417 * / 3DIGIT ; UN M.49 code
1418 *
1419 * variant = 5*8alphanum ; registered variants
1420 * / (DIGIT 3alphanum)
1421 *
1422 * extension = singleton 1*("-" (2*8alphanum))
1423 *
1424 * ; Single alphanumerics
1425 * ; "x" reserved for private use
1426 * singleton = DIGIT ; 0 - 9
1427 * / %x41-57 ; A - W
1428 * / %x59-5A ; Y - Z
1429 * / %x61-77 ; a - w
1430 * / %x79-7A ; y - z
1431 *
1432 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1433 * The parser below doesn't try to cope with extension or privateuse
1434 * that could be added but that's not interoperable anyway
1435 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001436 * Returns 1 if correct 0 otherwise
1437 **/
1438int
1439xmlCheckLanguageID(const xmlChar * lang)
1440{
Daniel Veillard60587d62010-11-04 15:16:27 +01001441 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001442
1443 if (cur == NULL)
1444 return (0);
1445 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001446 ((cur[0] == 'I') && (cur[1] == '-')) ||
1447 ((cur[0] == 'x') && (cur[1] == '-')) ||
1448 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001449 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001450 * Still allow IANA code and user code which were coming
1451 * from the previous version of the XML-1.0 specification
1452 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001453 */
1454 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001455 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001456 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1457 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001458 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001459 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001460 nxt = cur;
1461 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1462 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1463 nxt++;
1464 if (nxt - cur >= 4) {
1465 /*
1466 * Reserved
1467 */
1468 if ((nxt - cur > 8) || (nxt[0] != 0))
1469 return(0);
1470 return(1);
1471 }
1472 if (nxt - cur < 2)
1473 return(0);
1474 /* we got an ISO 639 code */
1475 if (nxt[0] == 0)
1476 return(1);
1477 if (nxt[0] != '-')
1478 return(0);
1479
1480 nxt++;
1481 cur = nxt;
1482 /* now we can have extlang or script or region or variant */
1483 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1484 goto region_m49;
1485
1486 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1487 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1488 nxt++;
1489 if (nxt - cur == 4)
1490 goto script;
1491 if (nxt - cur == 2)
1492 goto region;
1493 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1494 goto variant;
1495 if (nxt - cur != 3)
1496 return(0);
1497 /* we parsed an extlang */
1498 if (nxt[0] == 0)
1499 return(1);
1500 if (nxt[0] != '-')
1501 return(0);
1502
1503 nxt++;
1504 cur = nxt;
1505 /* now we can have script or region or variant */
1506 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1507 goto region_m49;
1508
1509 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1510 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1511 nxt++;
1512 if (nxt - cur == 2)
1513 goto region;
1514 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1515 goto variant;
1516 if (nxt - cur != 4)
1517 return(0);
1518 /* we parsed a script */
1519script:
1520 if (nxt[0] == 0)
1521 return(1);
1522 if (nxt[0] != '-')
1523 return(0);
1524
1525 nxt++;
1526 cur = nxt;
1527 /* now we can have region or variant */
1528 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1529 goto region_m49;
1530
1531 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1532 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1533 nxt++;
1534
1535 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1536 goto variant;
1537 if (nxt - cur != 2)
1538 return(0);
1539 /* we parsed a region */
1540region:
1541 if (nxt[0] == 0)
1542 return(1);
1543 if (nxt[0] != '-')
1544 return(0);
1545
1546 nxt++;
1547 cur = nxt;
1548 /* now we can just have a variant */
1549 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1550 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1551 nxt++;
1552
1553 if ((nxt - cur < 5) || (nxt - cur > 8))
1554 return(0);
1555
1556 /* we parsed a variant */
1557variant:
1558 if (nxt[0] == 0)
1559 return(1);
1560 if (nxt[0] != '-')
1561 return(0);
1562 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001563 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001564
1565region_m49:
1566 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1567 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1568 nxt += 3;
1569 goto region;
1570 }
1571 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001572}
1573
Owen Taylor3473f882001-02-23 17:55:21 +00001574/************************************************************************
1575 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001576 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001577 * *
1578 ************************************************************************/
1579
Daniel Veillard8ed10722009-08-20 19:17:36 +02001580static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1581 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001582
Daniel Veillard0fb18932003-09-07 09:14:37 +00001583#ifdef SAX2
1584/**
1585 * nsPush:
1586 * @ctxt: an XML parser context
1587 * @prefix: the namespace prefix or NULL
1588 * @URL: the namespace name
1589 *
1590 * Pushes a new parser namespace on top of the ns stack
1591 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001592 * Returns -1 in case of error, -2 if the namespace should be discarded
1593 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001594 */
1595static int
1596nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1597{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001598 if (ctxt->options & XML_PARSE_NSCLEAN) {
1599 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001600 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001601 if (ctxt->nsTab[i] == prefix) {
1602 /* in scope */
1603 if (ctxt->nsTab[i + 1] == URL)
1604 return(-2);
1605 /* out of scope keep it */
1606 break;
1607 }
1608 }
1609 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001610 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1611 ctxt->nsMax = 10;
1612 ctxt->nsNr = 0;
1613 ctxt->nsTab = (const xmlChar **)
1614 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1615 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001616 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 ctxt->nsMax = 0;
1618 return (-1);
1619 }
1620 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001621 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001622 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001623 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1624 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1625 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001626 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001627 ctxt->nsMax /= 2;
1628 return (-1);
1629 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001630 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001631 }
1632 ctxt->nsTab[ctxt->nsNr++] = prefix;
1633 ctxt->nsTab[ctxt->nsNr++] = URL;
1634 return (ctxt->nsNr);
1635}
1636/**
1637 * nsPop:
1638 * @ctxt: an XML parser context
1639 * @nr: the number to pop
1640 *
1641 * Pops the top @nr parser prefix/namespace from the ns stack
1642 *
1643 * Returns the number of namespaces removed
1644 */
1645static int
1646nsPop(xmlParserCtxtPtr ctxt, int nr)
1647{
1648 int i;
1649
1650 if (ctxt->nsTab == NULL) return(0);
1651 if (ctxt->nsNr < nr) {
1652 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1653 nr = ctxt->nsNr;
1654 }
1655 if (ctxt->nsNr <= 0)
1656 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001657
Daniel Veillard0fb18932003-09-07 09:14:37 +00001658 for (i = 0;i < nr;i++) {
1659 ctxt->nsNr--;
1660 ctxt->nsTab[ctxt->nsNr] = NULL;
1661 }
1662 return(nr);
1663}
1664#endif
1665
1666static int
1667xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1668 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001669 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001670 int maxatts;
1671
1672 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001673 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001674 atts = (const xmlChar **)
1675 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001677 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1679 if (attallocs == NULL) goto mem_error;
1680 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001681 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001682 } else if (nr + 5 > ctxt->maxatts) {
1683 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001684 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1685 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001686 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001687 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001688 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1689 (maxatts / 5) * sizeof(int));
1690 if (attallocs == NULL) goto mem_error;
1691 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692 ctxt->maxatts = maxatts;
1693 }
1694 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001695mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001696 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001697 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001698}
1699
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001700/**
1701 * inputPush:
1702 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001703 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001704 *
1705 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001706 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001707 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001708 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001709int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001710inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1711{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001712 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001713 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001714 if (ctxt->inputNr >= ctxt->inputMax) {
1715 ctxt->inputMax *= 2;
1716 ctxt->inputTab =
1717 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1718 ctxt->inputMax *
1719 sizeof(ctxt->inputTab[0]));
1720 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001721 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001722 xmlFreeInputStream(value);
1723 ctxt->inputMax /= 2;
1724 value = NULL;
1725 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726 }
1727 }
1728 ctxt->inputTab[ctxt->inputNr] = value;
1729 ctxt->input = value;
1730 return (ctxt->inputNr++);
1731}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001732/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001733 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001734 * @ctxt: an XML parser context
1735 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001736 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001737 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001738 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001739 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001740xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741inputPop(xmlParserCtxtPtr ctxt)
1742{
1743 xmlParserInputPtr ret;
1744
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001745 if (ctxt == NULL)
1746 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001747 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001748 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001749 ctxt->inputNr--;
1750 if (ctxt->inputNr > 0)
1751 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1752 else
1753 ctxt->input = NULL;
1754 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001755 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756 return (ret);
1757}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001758/**
1759 * nodePush:
1760 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001761 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001762 *
1763 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001764 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001765 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001766 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001767int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001768nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1769{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001770 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001771 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001772 xmlNodePtr *tmp;
1773
1774 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1775 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001776 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001777 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001778 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001779 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001780 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001781 ctxt->nodeTab = tmp;
1782 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001784 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1785 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001786 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001787 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001788 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001789 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001790 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001791 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001792 ctxt->nodeTab[ctxt->nodeNr] = value;
1793 ctxt->node = value;
1794 return (ctxt->nodeNr++);
1795}
Daniel Veillard8915c152008-08-26 13:05:34 +00001796
Daniel Veillard1c732d22002-11-30 11:22:59 +00001797/**
1798 * nodePop:
1799 * @ctxt: an XML parser context
1800 *
1801 * Pops the top element node from the node stack
1802 *
1803 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001804 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001805xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001806nodePop(xmlParserCtxtPtr ctxt)
1807{
1808 xmlNodePtr ret;
1809
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001810 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001812 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001813 ctxt->nodeNr--;
1814 if (ctxt->nodeNr > 0)
1815 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1816 else
1817 ctxt->node = NULL;
1818 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001819 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001820 return (ret);
1821}
Daniel Veillarda2351322004-06-27 12:08:10 +00001822
1823#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001824/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001825 * nameNsPush:
1826 * @ctxt: an XML parser context
1827 * @value: the element name
1828 * @prefix: the element prefix
1829 * @URI: the element namespace name
1830 *
1831 * Pushes a new element name/prefix/URL on top of the name stack
1832 *
1833 * Returns -1 in case of error, the index in the stack otherwise
1834 */
1835static int
1836nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1837 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1838{
1839 if (ctxt->nameNr >= ctxt->nameMax) {
1840 const xmlChar * *tmp;
1841 void **tmp2;
1842 ctxt->nameMax *= 2;
1843 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1844 ctxt->nameMax *
1845 sizeof(ctxt->nameTab[0]));
1846 if (tmp == NULL) {
1847 ctxt->nameMax /= 2;
1848 goto mem_error;
1849 }
1850 ctxt->nameTab = tmp;
1851 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1852 ctxt->nameMax * 3 *
1853 sizeof(ctxt->pushTab[0]));
1854 if (tmp2 == NULL) {
1855 ctxt->nameMax /= 2;
1856 goto mem_error;
1857 }
1858 ctxt->pushTab = tmp2;
1859 }
1860 ctxt->nameTab[ctxt->nameNr] = value;
1861 ctxt->name = value;
1862 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1863 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001864 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001865 return (ctxt->nameNr++);
1866mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001867 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001868 return (-1);
1869}
1870/**
1871 * nameNsPop:
1872 * @ctxt: an XML parser context
1873 *
1874 * Pops the top element/prefix/URI name from the name stack
1875 *
1876 * Returns the name just removed
1877 */
1878static const xmlChar *
1879nameNsPop(xmlParserCtxtPtr ctxt)
1880{
1881 const xmlChar *ret;
1882
1883 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001884 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 ctxt->nameNr--;
1886 if (ctxt->nameNr > 0)
1887 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1888 else
1889 ctxt->name = NULL;
1890 ret = ctxt->nameTab[ctxt->nameNr];
1891 ctxt->nameTab[ctxt->nameNr] = NULL;
1892 return (ret);
1893}
Daniel Veillarda2351322004-06-27 12:08:10 +00001894#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001895
1896/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001897 * namePush:
1898 * @ctxt: an XML parser context
1899 * @value: the element name
1900 *
1901 * Pushes a new element name on top of the name stack
1902 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001903 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001904 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001905int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001906namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001907{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001908 if (ctxt == NULL) return (-1);
1909
Daniel Veillard1c732d22002-11-30 11:22:59 +00001910 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001911 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001912 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001913 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001914 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001916 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001917 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001918 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001919 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001920 }
1921 ctxt->nameTab[ctxt->nameNr] = value;
1922 ctxt->name = value;
1923 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001924mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001925 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001926 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001927}
1928/**
1929 * namePop:
1930 * @ctxt: an XML parser context
1931 *
1932 * Pops the top element name from the name stack
1933 *
1934 * Returns the name just removed
1935 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001936const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001937namePop(xmlParserCtxtPtr ctxt)
1938{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001939 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001940
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001941 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1942 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001943 ctxt->nameNr--;
1944 if (ctxt->nameNr > 0)
1945 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1946 else
1947 ctxt->name = NULL;
1948 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001949 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001950 return (ret);
1951}
Owen Taylor3473f882001-02-23 17:55:21 +00001952
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001953static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001954 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001955 int *tmp;
1956
Owen Taylor3473f882001-02-23 17:55:21 +00001957 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001958 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1959 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1960 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001961 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001962 ctxt->spaceMax /=2;
1963 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001964 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001965 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001966 }
1967 ctxt->spaceTab[ctxt->spaceNr] = val;
1968 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1969 return(ctxt->spaceNr++);
1970}
1971
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001972static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001973 int ret;
1974 if (ctxt->spaceNr <= 0) return(0);
1975 ctxt->spaceNr--;
1976 if (ctxt->spaceNr > 0)
1977 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1978 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001979 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001980 ret = ctxt->spaceTab[ctxt->spaceNr];
1981 ctxt->spaceTab[ctxt->spaceNr] = -1;
1982 return(ret);
1983}
1984
1985/*
1986 * Macros for accessing the content. Those should be used only by the parser,
1987 * and not exported.
1988 *
1989 * Dirty macros, i.e. one often need to make assumption on the context to
1990 * use them
1991 *
1992 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1993 * To be used with extreme caution since operations consuming
1994 * characters may move the input buffer to a different location !
1995 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1996 * This should be used internally by the parser
1997 * only to compare to ASCII values otherwise it would break when
1998 * running with UTF-8 encoding.
1999 * RAW same as CUR but in the input buffer, bypass any token
2000 * extraction that may have been done
2001 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2002 * to compare on ASCII based substring.
2003 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002004 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002005 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002006 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002007 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2008 *
2009 * NEXT Skip to the next character, this does the proper decoding
2010 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002011 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002012 * CUR_CHAR(l) returns the current unicode character (int), set l
2013 * to the number of xmlChars used for the encoding [0-5].
2014 * CUR_SCHAR same but operate on a string instead of the context
2015 * COPY_BUF copy the current unicode char to the target buffer, increment
2016 * the index
2017 * GROW, SHRINK handling of input buffers
2018 */
2019
Daniel Veillardfdc91562002-07-01 21:52:03 +00002020#define RAW (*ctxt->input->cur)
2021#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002022#define NXT(val) ctxt->input->cur[(val)]
2023#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002024#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002025
Daniel Veillarda07050d2003-10-19 14:46:32 +00002026#define CMP4( s, c1, c2, c3, c4 ) \
2027 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2028 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2029#define CMP5( s, c1, c2, c3, c4, c5 ) \
2030 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2031#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2032 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2033#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2034 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2035#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2036 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2037#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2038 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2039 ((unsigned char *) s)[ 8 ] == c9 )
2040#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2041 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2042 ((unsigned char *) s)[ 9 ] == c10 )
2043
Owen Taylor3473f882001-02-23 17:55:21 +00002044#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002045 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002046 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002047 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002048 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2049 xmlPopInput(ctxt); \
2050 } while (0)
2051
Daniel Veillard0b787f32004-03-26 17:29:53 +00002052#define SKIPL(val) do { \
2053 int skipl; \
2054 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002055 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002056 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002057 } else ctxt->input->col++; \
2058 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002059 ctxt->input->cur++; \
2060 } \
2061 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2062 if ((*ctxt->input->cur == 0) && \
2063 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2064 xmlPopInput(ctxt); \
2065 } while (0)
2066
Daniel Veillarda880b122003-04-21 21:36:41 +00002067#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002068 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2069 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002070 xmlSHRINK (ctxt);
2071
2072static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2073 xmlParserInputShrink(ctxt->input);
2074 if ((*ctxt->input->cur == 0) &&
2075 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2076 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002077 }
Owen Taylor3473f882001-02-23 17:55:21 +00002078
Daniel Veillarda880b122003-04-21 21:36:41 +00002079#define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002081 xmlGROW (ctxt);
2082
2083static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2086
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002092 xmlHaltParser(ctxt);
2093 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002094 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2100 return;
2101 }
Daniel Veillard59df7832010-02-02 10:24:01 +01002102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002103 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2104 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002105}
Owen Taylor3473f882001-02-23 17:55:21 +00002106
2107#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2108
2109#define NEXT xmlNextChar(ctxt)
2110
Daniel Veillard21a0f912001-02-25 19:54:14 +00002111#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002112 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002113 ctxt->input->cur++; \
2114 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002115 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002116 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119#define NEXTL(l) do { \
2120 if (*(ctxt->input->cur) == '\n') { \
2121 ctxt->input->line++; ctxt->input->col = 1; \
2122 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002123 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002124 } while (0)
2125
2126#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2127#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2128
2129#define COPY_BUF(l,b,i,v) \
2130 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002131 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002132
2133/**
2134 * xmlSkipBlankChars:
2135 * @ctxt: the XML parser context
2136 *
2137 * skip all blanks character found at that point in the input streams.
2138 * It pops up finished entities in the process if allowable at that point.
2139 *
2140 * Returns the number of space chars skipped
2141 */
2142
2143int
2144xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002145 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002146
2147 /*
2148 * It's Okay to use CUR/NEXT here since all the blanks are on
2149 * the ASCII range.
2150 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002151 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2152 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002153 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002154 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002155 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002156 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002157 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002158 if (*cur == '\n') {
2159 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002160 } else {
2161 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002162 }
2163 cur++;
2164 res++;
2165 if (*cur == 0) {
2166 ctxt->input->cur = cur;
2167 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2168 cur = ctxt->input->cur;
2169 }
2170 }
2171 ctxt->input->cur = cur;
2172 } else {
2173 int cur;
2174 do {
2175 cur = CUR;
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002176 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2177 (ctxt->instate != XML_PARSER_EOF))) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002178 NEXT;
2179 cur = CUR;
2180 res++;
2181 }
2182 while ((cur == 0) && (ctxt->inputNr > 1) &&
2183 (ctxt->instate != XML_PARSER_COMMENT)) {
2184 xmlPopInput(ctxt);
2185 cur = CUR;
2186 }
2187 /*
2188 * Need to handle support of entities branching here
2189 */
2190 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002191 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2192 (ctxt->instate != XML_PARSER_EOF));
Daniel Veillard02141ea2001-04-30 11:46:40 +00002193 }
Owen Taylor3473f882001-02-23 17:55:21 +00002194 return(res);
2195}
2196
2197/************************************************************************
2198 * *
2199 * Commodity functions to handle entities *
2200 * *
2201 ************************************************************************/
2202
2203/**
2204 * xmlPopInput:
2205 * @ctxt: an XML parser context
2206 *
2207 * xmlPopInput: the current input pointed by ctxt->input came to an end
2208 * pop it and return the next char.
2209 *
2210 * Returns the current xmlChar in the parser context
2211 */
2212xmlChar
2213xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002214 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002215 if (xmlParserDebugEntities)
2216 xmlGenericError(xmlGenericErrorContext,
2217 "Popping input %d\n", ctxt->inputNr);
2218 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002219 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002220 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2221 return(xmlPopInput(ctxt));
2222 return(CUR);
2223}
2224
2225/**
2226 * xmlPushInput:
2227 * @ctxt: an XML parser context
2228 * @input: an XML parser input fragment (entity, XML fragment ...).
2229 *
2230 * xmlPushInput: switch to a new input stream which is stacked on top
2231 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002232 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002233 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002234int
Owen Taylor3473f882001-02-23 17:55:21 +00002235xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002236 int ret;
2237 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002238
2239 if (xmlParserDebugEntities) {
2240 if ((ctxt->input != NULL) && (ctxt->input->filename))
2241 xmlGenericError(xmlGenericErrorContext,
2242 "%s(%d): ", ctxt->input->filename,
2243 ctxt->input->line);
2244 xmlGenericError(xmlGenericErrorContext,
2245 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2246 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002247 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002248 if (ctxt->instate == XML_PARSER_EOF)
2249 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002250 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002251 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002252}
2253
2254/**
2255 * xmlParseCharRef:
2256 * @ctxt: an XML parser context
2257 *
2258 * parse Reference declarations
2259 *
2260 * [66] CharRef ::= '&#' [0-9]+ ';' |
2261 * '&#x' [0-9a-fA-F]+ ';'
2262 *
2263 * [ WFC: Legal Character ]
2264 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002265 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002266 *
2267 * Returns the value parsed (as an int), 0 in case of error
2268 */
2269int
2270xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002271 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002272 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002273 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002274
Owen Taylor3473f882001-02-23 17:55:21 +00002275 /*
2276 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2277 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002278 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002279 (NXT(2) == 'x')) {
2280 SKIP(3);
2281 GROW;
2282 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002283 if (count++ > 20) {
2284 count = 0;
2285 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002286 if (ctxt->instate == XML_PARSER_EOF)
2287 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002288 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002289 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002290 val = val * 16 + (CUR - '0');
2291 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2292 val = val * 16 + (CUR - 'a') + 10;
2293 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2294 val = val * 16 + (CUR - 'A') + 10;
2295 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002296 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002297 val = 0;
2298 break;
2299 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002300 if (val > 0x10FFFF)
2301 outofrange = val;
2302
Owen Taylor3473f882001-02-23 17:55:21 +00002303 NEXT;
2304 count++;
2305 }
2306 if (RAW == ';') {
2307 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002308 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002309 ctxt->nbChars ++;
2310 ctxt->input->cur++;
2311 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002312 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002313 SKIP(2);
2314 GROW;
2315 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002316 if (count++ > 20) {
2317 count = 0;
2318 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002319 if (ctxt->instate == XML_PARSER_EOF)
2320 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002321 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002322 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002323 val = val * 10 + (CUR - '0');
2324 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002325 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002326 val = 0;
2327 break;
2328 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002329 if (val > 0x10FFFF)
2330 outofrange = val;
2331
Owen Taylor3473f882001-02-23 17:55:21 +00002332 NEXT;
2333 count++;
2334 }
2335 if (RAW == ';') {
2336 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002337 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002338 ctxt->nbChars ++;
2339 ctxt->input->cur++;
2340 }
2341 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002342 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002343 }
2344
2345 /*
2346 * [ WFC: Legal Character ]
2347 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002348 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002349 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002350 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002351 return(val);
2352 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002353 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2354 "xmlParseCharRef: invalid xmlChar value %d\n",
2355 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002356 }
2357 return(0);
2358}
2359
2360/**
2361 * xmlParseStringCharRef:
2362 * @ctxt: an XML parser context
2363 * @str: a pointer to an index in the string
2364 *
2365 * parse Reference declarations, variant parsing from a string rather
2366 * than an an input flow.
2367 *
2368 * [66] CharRef ::= '&#' [0-9]+ ';' |
2369 * '&#x' [0-9a-fA-F]+ ';'
2370 *
2371 * [ WFC: Legal Character ]
2372 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002373 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002374 *
2375 * Returns the value parsed (as an int), 0 in case of error, str will be
2376 * updated to the current value of the index
2377 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002378static int
Owen Taylor3473f882001-02-23 17:55:21 +00002379xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2380 const xmlChar *ptr;
2381 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002382 unsigned int val = 0;
2383 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002384
2385 if ((str == NULL) || (*str == NULL)) return(0);
2386 ptr = *str;
2387 cur = *ptr;
2388 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2389 ptr += 3;
2390 cur = *ptr;
2391 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002392 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002393 val = val * 16 + (cur - '0');
2394 else if ((cur >= 'a') && (cur <= 'f'))
2395 val = val * 16 + (cur - 'a') + 10;
2396 else if ((cur >= 'A') && (cur <= 'F'))
2397 val = val * 16 + (cur - 'A') + 10;
2398 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002399 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002400 val = 0;
2401 break;
2402 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002403 if (val > 0x10FFFF)
2404 outofrange = val;
2405
Owen Taylor3473f882001-02-23 17:55:21 +00002406 ptr++;
2407 cur = *ptr;
2408 }
2409 if (cur == ';')
2410 ptr++;
2411 } else if ((cur == '&') && (ptr[1] == '#')){
2412 ptr += 2;
2413 cur = *ptr;
2414 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002415 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002416 val = val * 10 + (cur - '0');
2417 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002418 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002419 val = 0;
2420 break;
2421 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002422 if (val > 0x10FFFF)
2423 outofrange = val;
2424
Owen Taylor3473f882001-02-23 17:55:21 +00002425 ptr++;
2426 cur = *ptr;
2427 }
2428 if (cur == ';')
2429 ptr++;
2430 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002431 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 return(0);
2433 }
2434 *str = ptr;
2435
2436 /*
2437 * [ WFC: Legal Character ]
2438 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002439 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002440 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002441 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002442 return(val);
2443 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002444 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2445 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2446 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002447 }
2448 return(0);
2449}
2450
2451/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002452 * xmlNewBlanksWrapperInputStream:
2453 * @ctxt: an XML parser context
2454 * @entity: an Entity pointer
2455 *
2456 * Create a new input stream for wrapping
2457 * blanks around a PEReference
2458 *
2459 * Returns the new input stream or NULL
2460 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002461
Daniel Veillardf5582f12002-06-11 10:08:16 +00002462static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002463
Daniel Veillardf4862f02002-09-10 11:13:43 +00002464static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002465xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2466 xmlParserInputPtr input;
2467 xmlChar *buffer;
2468 size_t length;
2469 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002470 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2471 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002472 return(NULL);
2473 }
2474 if (xmlParserDebugEntities)
2475 xmlGenericError(xmlGenericErrorContext,
2476 "new blanks wrapper for entity: %s\n", entity->name);
2477 input = xmlNewInputStream(ctxt);
2478 if (input == NULL) {
2479 return(NULL);
2480 }
2481 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002482 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002483 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002485 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002486 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002487 }
2488 buffer [0] = ' ';
2489 buffer [1] = '%';
2490 buffer [length-3] = ';';
2491 buffer [length-2] = ' ';
2492 buffer [length-1] = 0;
2493 memcpy(buffer + 2, entity->name, length - 5);
2494 input->free = deallocblankswrapper;
2495 input->base = buffer;
2496 input->cur = buffer;
2497 input->length = length;
2498 input->end = &buffer[length];
2499 return(input);
2500}
2501
2502/**
Owen Taylor3473f882001-02-23 17:55:21 +00002503 * xmlParserHandlePEReference:
2504 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002505 *
Owen Taylor3473f882001-02-23 17:55:21 +00002506 * [69] PEReference ::= '%' Name ';'
2507 *
2508 * [ WFC: No Recursion ]
2509 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002510 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002511 *
2512 * [ WFC: Entity Declared ]
2513 * In a document without any DTD, a document with only an internal DTD
2514 * subset which contains no parameter entity references, or a document
2515 * with "standalone='yes'", ... ... The declaration of a parameter
2516 * entity must precede any reference to it...
2517 *
2518 * [ VC: Entity Declared ]
2519 * In a document with an external subset or external parameter entities
2520 * with "standalone='no'", ... ... The declaration of a parameter entity
2521 * must precede any reference to it...
2522 *
2523 * [ WFC: In DTD ]
2524 * Parameter-entity references may only appear in the DTD.
2525 * NOTE: misleading but this is handled.
2526 *
2527 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002528 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002529 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002530 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002531 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002532 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002533 */
2534void
2535xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002536 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002537 xmlEntityPtr entity = NULL;
2538 xmlParserInputPtr input;
2539
Owen Taylor3473f882001-02-23 17:55:21 +00002540 if (RAW != '%') return;
2541 switch(ctxt->instate) {
2542 case XML_PARSER_CDATA_SECTION:
2543 return;
2544 case XML_PARSER_COMMENT:
2545 return;
2546 case XML_PARSER_START_TAG:
2547 return;
2548 case XML_PARSER_END_TAG:
2549 return;
2550 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002551 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002552 return;
2553 case XML_PARSER_PROLOG:
2554 case XML_PARSER_START:
2555 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002556 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002557 return;
2558 case XML_PARSER_ENTITY_DECL:
2559 case XML_PARSER_CONTENT:
2560 case XML_PARSER_ATTRIBUTE_VALUE:
2561 case XML_PARSER_PI:
2562 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002563 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002564 /* we just ignore it there */
2565 return;
2566 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002567 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002568 return;
2569 case XML_PARSER_ENTITY_VALUE:
2570 /*
2571 * NOTE: in the case of entity values, we don't do the
2572 * substitution here since we need the literal
2573 * entity value to be able to save the internal
2574 * subset of the document.
2575 * This will be handled by xmlStringDecodeEntities
2576 */
2577 return;
2578 case XML_PARSER_DTD:
2579 /*
2580 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2581 * In the internal DTD subset, parameter-entity references
2582 * can occur only where markup declarations can occur, not
2583 * within markup declarations.
2584 * In that case this is handled in xmlParseMarkupDecl
2585 */
2586 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2587 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002588 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002589 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002590 break;
2591 case XML_PARSER_IGNORE:
2592 return;
2593 }
2594
2595 NEXT;
2596 name = xmlParseName(ctxt);
2597 if (xmlParserDebugEntities)
2598 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002599 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002600 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002601 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002602 } else {
2603 if (RAW == ';') {
2604 NEXT;
2605 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2606 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002607 if (ctxt->instate == XML_PARSER_EOF)
2608 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002609 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002610
Owen Taylor3473f882001-02-23 17:55:21 +00002611 /*
2612 * [ WFC: Entity Declared ]
2613 * In a document without any DTD, a document with only an
2614 * internal DTD subset which contains no parameter entity
2615 * references, or a document with "standalone='yes'", ...
2616 * ... The declaration of a parameter entity must precede
2617 * any reference to it...
2618 */
2619 if ((ctxt->standalone == 1) ||
2620 ((ctxt->hasExternalSubset == 0) &&
2621 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002622 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002623 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002624 } else {
2625 /*
2626 * [ VC: Entity Declared ]
2627 * In a document with an external subset or external
2628 * parameter entities with "standalone='no'", ...
2629 * ... The declaration of a parameter entity must precede
2630 * any reference to it...
2631 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002632 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2633 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2634 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002635 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002636 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002637 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2638 "PEReference: %%%s; not found\n",
2639 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002640 ctxt->valid = 0;
2641 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002642 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002643 } else if (ctxt->input->free != deallocblankswrapper) {
2644 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002645 if (xmlPushInput(ctxt, input) < 0)
2646 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002647 } else {
2648 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2649 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002650 xmlChar start[4];
2651 xmlCharEncoding enc;
2652
Owen Taylor3473f882001-02-23 17:55:21 +00002653 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002654 * Note: external parameter entities will not be loaded, it
2655 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002656 * option of validating, or substituting entities were
2657 * given. Doing so is far more secure as the parser will
2658 * only process data coming from the document entity by
2659 * default.
2660 */
2661 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2662 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2663 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002664 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2665 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2666 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002667 (ctxt->validate == 0))
2668 return;
2669
2670 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002671 * handle the extra spaces added before and after
2672 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002673 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002674 */
2675 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002676 if (xmlPushInput(ctxt, input) < 0)
2677 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002678
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002679 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002680 * Get the 4 first bytes and decode the charset
2681 * if enc != XML_CHAR_ENCODING_NONE
2682 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002683 * Note that, since we may have some non-UTF8
2684 * encoding (like UTF16, bug 135229), the 'length'
2685 * is not known, but we can calculate based upon
2686 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002687 */
2688 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002689 if (ctxt->instate == XML_PARSER_EOF)
2690 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002691 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002692 start[0] = RAW;
2693 start[1] = NXT(1);
2694 start[2] = NXT(2);
2695 start[3] = NXT(3);
2696 enc = xmlDetectCharEncoding(start, 4);
2697 if (enc != XML_CHAR_ENCODING_NONE) {
2698 xmlSwitchEncoding(ctxt, enc);
2699 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002700 }
2701
Owen Taylor3473f882001-02-23 17:55:21 +00002702 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002703 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2704 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002705 xmlParseTextDecl(ctxt);
2706 }
Owen Taylor3473f882001-02-23 17:55:21 +00002707 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002708 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2709 "PEReference: %s is not a parameter entity\n",
2710 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002711 }
2712 }
2713 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002714 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002715 }
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
2717}
2718
2719/*
2720 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002721 * buffer##_size is expected to be a size_t
2722 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002723 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002724#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002725 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002726 size_t new_size = buffer##_size * 2 + n; \
2727 if (new_size < buffer##_size) goto mem_error; \
2728 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002729 if (tmp == NULL) goto mem_error; \
2730 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002731 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002732}
2733
2734/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002735 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002736 * @ctxt: the parser context
2737 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002738 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002739 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2740 * @end: an end marker xmlChar, 0 if none
2741 * @end2: an end marker xmlChar, 0 if none
2742 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002743 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002744 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002745 *
2746 * [67] Reference ::= EntityRef | CharRef
2747 *
2748 * [69] PEReference ::= '%' Name ';'
2749 *
2750 * Returns A newly allocated string with the substitution done. The caller
2751 * must deallocate it !
2752 */
2753xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002754xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2755 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002756 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002757 size_t buffer_size = 0;
2758 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002759
2760 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002761 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002762 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002763 xmlEntityPtr ent;
2764 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002765
Daniel Veillarda82b1822004-11-08 16:24:57 +00002766 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002767 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002768 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002769
Daniel Veillard0161e632008-08-28 15:36:32 +00002770 if (((ctxt->depth > 40) &&
2771 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2772 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002773 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002774 return(NULL);
2775 }
2776
2777 /*
2778 * allocate a translation buffer.
2779 */
2780 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002781 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002782 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002783
2784 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002785 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002786 * we are operating on already parsed values.
2787 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002788 if (str < last)
2789 c = CUR_SCHAR(str, l);
2790 else
2791 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002792 while ((c != 0) && (c != end) && /* non input consuming loop */
2793 (c != end2) && (c != end3)) {
2794
2795 if (c == 0) break;
2796 if ((c == '&') && (str[1] == '#')) {
2797 int val = xmlParseStringCharRef(ctxt, &str);
2798 if (val != 0) {
2799 COPY_BUF(0,buffer,nbchars,val);
2800 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002801 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002802 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002803 }
Owen Taylor3473f882001-02-23 17:55:21 +00002804 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2805 if (xmlParserDebugEntities)
2806 xmlGenericError(xmlGenericErrorContext,
2807 "String decoding Entity Reference: %.30s\n",
2808 str);
2809 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002810 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2811 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002812 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002813 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002814 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002815 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002816 if ((ent != NULL) &&
2817 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2818 if (ent->content != NULL) {
2819 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002820 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002821 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002822 }
Owen Taylor3473f882001-02-23 17:55:21 +00002823 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002824 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2825 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002826 }
2827 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002828 ctxt->depth++;
2829 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2830 0, 0, 0);
2831 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002832
David Drysdale69030712015-11-20 11:13:45 +08002833 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2834 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2835 goto int_error;
2836
Owen Taylor3473f882001-02-23 17:55:21 +00002837 if (rep != NULL) {
2838 current = rep;
2839 while (*current != 0) { /* non input consuming loop */
2840 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002841 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002842 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002843 goto int_error;
2844 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002845 }
2846 }
2847 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002848 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002849 }
2850 } else if (ent != NULL) {
2851 int i = xmlStrlen(ent->name);
2852 const xmlChar *cur = ent->name;
2853
2854 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002855 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002856 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002857 }
2858 for (;i > 0;i--)
2859 buffer[nbchars++] = *cur++;
2860 buffer[nbchars++] = ';';
2861 }
2862 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2863 if (xmlParserDebugEntities)
2864 xmlGenericError(xmlGenericErrorContext,
2865 "String decoding PE Reference: %.30s\n", str);
2866 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002867 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2868 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002869 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002870 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002871 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002872 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002873 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002874 /*
2875 * Note: external parsed entities will not be loaded,
2876 * it is not required for a non-validating parser to
2877 * complete external PEreferences coming from the
2878 * internal subset
2879 */
2880 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2881 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2882 (ctxt->validate != 0)) {
2883 xmlLoadEntityContent(ctxt, ent);
2884 } else {
2885 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2886 "not validating will not read content for PE entity %s\n",
2887 ent->name, NULL);
2888 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002889 }
Owen Taylor3473f882001-02-23 17:55:21 +00002890 ctxt->depth++;
2891 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2892 0, 0, 0);
2893 ctxt->depth--;
2894 if (rep != NULL) {
2895 current = rep;
2896 while (*current != 0) { /* non input consuming loop */
2897 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002898 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002899 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002900 goto int_error;
2901 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002902 }
2903 }
2904 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002905 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002906 }
2907 }
2908 } else {
2909 COPY_BUF(l,buffer,nbchars,c);
2910 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002911 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2912 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002913 }
2914 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002915 if (str < last)
2916 c = CUR_SCHAR(str, l);
2917 else
2918 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002919 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002920 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002921 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002922
2923mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002924 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002925int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002926 if (rep != NULL)
2927 xmlFree(rep);
2928 if (buffer != NULL)
2929 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002930 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002931}
2932
Daniel Veillarde57ec792003-09-10 10:50:59 +00002933/**
2934 * xmlStringDecodeEntities:
2935 * @ctxt: the parser context
2936 * @str: the input string
2937 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2938 * @end: an end marker xmlChar, 0 if none
2939 * @end2: an end marker xmlChar, 0 if none
2940 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002941 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002942 * Takes a entity string content and process to do the adequate substitutions.
2943 *
2944 * [67] Reference ::= EntityRef | CharRef
2945 *
2946 * [69] PEReference ::= '%' Name ';'
2947 *
2948 * Returns A newly allocated string with the substitution done. The caller
2949 * must deallocate it !
2950 */
2951xmlChar *
2952xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2953 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002954 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002955 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2956 end, end2, end3));
2957}
Owen Taylor3473f882001-02-23 17:55:21 +00002958
2959/************************************************************************
2960 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002961 * Commodity functions, cleanup needed ? *
2962 * *
2963 ************************************************************************/
2964
2965/**
2966 * areBlanks:
2967 * @ctxt: an XML parser context
2968 * @str: a xmlChar *
2969 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002970 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002971 *
2972 * Is this a sequence of blank chars that one can ignore ?
2973 *
2974 * Returns 1 if ignorable 0 otherwise.
2975 */
2976
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002977static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2978 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002979 int i, ret;
2980 xmlNodePtr lastChild;
2981
Daniel Veillard05c13a22001-09-09 08:38:09 +00002982 /*
2983 * Don't spend time trying to differentiate them, the same callback is
2984 * used !
2985 */
2986 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002987 return(0);
2988
Owen Taylor3473f882001-02-23 17:55:21 +00002989 /*
2990 * Check for xml:space value.
2991 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002992 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2993 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002994 return(0);
2995
2996 /*
2997 * Check that the string is made of blanks
2998 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002999 if (blank_chars == 0) {
3000 for (i = 0;i < len;i++)
3001 if (!(IS_BLANK_CH(str[i]))) return(0);
3002 }
Owen Taylor3473f882001-02-23 17:55:21 +00003003
3004 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003005 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00003006 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00003007 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003008 if (ctxt->myDoc != NULL) {
3009 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3010 if (ret == 0) return(1);
3011 if (ret == 1) return(0);
3012 }
3013
3014 /*
3015 * Otherwise, heuristic :-\
3016 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00003017 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003018 if ((ctxt->node->children == NULL) &&
3019 (RAW == '<') && (NXT(1) == '/')) return(0);
3020
3021 lastChild = xmlGetLastChild(ctxt->node);
3022 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00003023 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3024 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003025 } else if (xmlNodeIsText(lastChild))
3026 return(0);
3027 else if ((ctxt->node->children != NULL) &&
3028 (xmlNodeIsText(ctxt->node->children)))
3029 return(0);
3030 return(1);
3031}
3032
Owen Taylor3473f882001-02-23 17:55:21 +00003033/************************************************************************
3034 * *
3035 * Extra stuff for namespace support *
3036 * Relates to http://www.w3.org/TR/WD-xml-names *
3037 * *
3038 ************************************************************************/
3039
3040/**
3041 * xmlSplitQName:
3042 * @ctxt: an XML parser context
3043 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003044 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00003045 *
3046 * parse an UTF8 encoded XML qualified name string
3047 *
3048 * [NS 5] QName ::= (Prefix ':')? LocalPart
3049 *
3050 * [NS 6] Prefix ::= NCName
3051 *
3052 * [NS 7] LocalPart ::= NCName
3053 *
3054 * Returns the local part, and prefix is updated
3055 * to get the Prefix if any.
3056 */
3057
3058xmlChar *
3059xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3060 xmlChar buf[XML_MAX_NAMELEN + 5];
3061 xmlChar *buffer = NULL;
3062 int len = 0;
3063 int max = XML_MAX_NAMELEN;
3064 xmlChar *ret = NULL;
3065 const xmlChar *cur = name;
3066 int c;
3067
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003068 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003069 *prefix = NULL;
3070
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00003071 if (cur == NULL) return(NULL);
3072
Owen Taylor3473f882001-02-23 17:55:21 +00003073#ifndef XML_XML_NAMESPACE
3074 /* xml: prefix is not really a namespace */
3075 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3076 (cur[2] == 'l') && (cur[3] == ':'))
3077 return(xmlStrdup(name));
3078#endif
3079
Daniel Veillard597bc482003-07-24 16:08:28 +00003080 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (cur[0] == ':')
3082 return(xmlStrdup(name));
3083
3084 c = *cur++;
3085 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3086 buf[len++] = c;
3087 c = *cur++;
3088 }
3089 if (len >= max) {
3090 /*
3091 * Okay someone managed to make a huge name, so he's ready to pay
3092 * for the processing speed.
3093 */
3094 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003095
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003096 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003097 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003098 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003099 return(NULL);
3100 }
3101 memcpy(buffer, buf, len);
3102 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3103 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003104 xmlChar *tmp;
3105
Owen Taylor3473f882001-02-23 17:55:21 +00003106 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003107 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003108 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003109 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003110 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003111 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003112 return(NULL);
3113 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003114 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003115 }
3116 buffer[len++] = c;
3117 c = *cur++;
3118 }
3119 buffer[len] = 0;
3120 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003121
Daniel Veillard597bc482003-07-24 16:08:28 +00003122 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003123 if (buffer != NULL)
3124 xmlFree(buffer);
3125 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003126 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003127 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003128
Owen Taylor3473f882001-02-23 17:55:21 +00003129 if (buffer == NULL)
3130 ret = xmlStrndup(buf, len);
3131 else {
3132 ret = buffer;
3133 buffer = NULL;
3134 max = XML_MAX_NAMELEN;
3135 }
3136
3137
3138 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003139 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003140 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003141 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003142 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003143 }
Owen Taylor3473f882001-02-23 17:55:21 +00003144 len = 0;
3145
Daniel Veillardbb284f42002-10-16 18:02:47 +00003146 /*
3147 * Check that the first character is proper to start
3148 * a new name
3149 */
3150 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3151 ((c >= 0x41) && (c <= 0x5A)) ||
3152 (c == '_') || (c == ':'))) {
3153 int l;
3154 int first = CUR_SCHAR(cur, l);
3155
3156 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003157 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003158 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003159 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003160 }
3161 }
3162 cur++;
3163
Owen Taylor3473f882001-02-23 17:55:21 +00003164 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3165 buf[len++] = c;
3166 c = *cur++;
3167 }
3168 if (len >= max) {
3169 /*
3170 * Okay someone managed to make a huge name, so he's ready to pay
3171 * for the processing speed.
3172 */
3173 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003174
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003175 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003176 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003177 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003178 return(NULL);
3179 }
3180 memcpy(buffer, buf, len);
3181 while (c != 0) { /* tested bigname2.xml */
3182 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003183 xmlChar *tmp;
3184
Owen Taylor3473f882001-02-23 17:55:21 +00003185 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003186 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003187 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003188 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003189 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003190 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003191 return(NULL);
3192 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003193 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003194 }
3195 buffer[len++] = c;
3196 c = *cur++;
3197 }
3198 buffer[len] = 0;
3199 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003200
Owen Taylor3473f882001-02-23 17:55:21 +00003201 if (buffer == NULL)
3202 ret = xmlStrndup(buf, len);
3203 else {
3204 ret = buffer;
3205 }
3206 }
3207
3208 return(ret);
3209}
3210
3211/************************************************************************
3212 * *
3213 * The parser itself *
3214 * Relates to http://www.w3.org/TR/REC-xml *
3215 * *
3216 ************************************************************************/
3217
Daniel Veillard34e3f642008-07-29 09:02:27 +00003218/************************************************************************
3219 * *
3220 * Routines to parse Name, NCName and NmToken *
3221 * *
3222 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003223#ifdef DEBUG
3224static unsigned long nbParseName = 0;
3225static unsigned long nbParseNmToken = 0;
3226static unsigned long nbParseNCName = 0;
3227static unsigned long nbParseNCNameComplex = 0;
3228static unsigned long nbParseNameComplex = 0;
3229static unsigned long nbParseStringName = 0;
3230#endif
3231
Daniel Veillard34e3f642008-07-29 09:02:27 +00003232/*
3233 * The two following functions are related to the change of accepted
3234 * characters for Name and NmToken in the Revision 5 of XML-1.0
3235 * They correspond to the modified production [4] and the new production [4a]
3236 * changes in that revision. Also note that the macros used for the
3237 * productions Letter, Digit, CombiningChar and Extender are not needed
3238 * anymore.
3239 * We still keep compatibility to pre-revision5 parsing semantic if the
3240 * new XML_PARSE_OLD10 option is given to the parser.
3241 */
3242static int
3243xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3244 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245 /*
3246 * Use the new checks of production [4] [4a] amd [5] of the
3247 * Update 5 of XML-1.0
3248 */
3249 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3250 (((c >= 'a') && (c <= 'z')) ||
3251 ((c >= 'A') && (c <= 'Z')) ||
3252 (c == '_') || (c == ':') ||
3253 ((c >= 0xC0) && (c <= 0xD6)) ||
3254 ((c >= 0xD8) && (c <= 0xF6)) ||
3255 ((c >= 0xF8) && (c <= 0x2FF)) ||
3256 ((c >= 0x370) && (c <= 0x37D)) ||
3257 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258 ((c >= 0x200C) && (c <= 0x200D)) ||
3259 ((c >= 0x2070) && (c <= 0x218F)) ||
3260 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264 ((c >= 0x10000) && (c <= 0xEFFFF))))
3265 return(1);
3266 } else {
3267 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3268 return(1);
3269 }
3270 return(0);
3271}
3272
3273static int
3274xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3275 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3276 /*
3277 * Use the new checks of production [4] [4a] amd [5] of the
3278 * Update 5 of XML-1.0
3279 */
3280 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3281 (((c >= 'a') && (c <= 'z')) ||
3282 ((c >= 'A') && (c <= 'Z')) ||
3283 ((c >= '0') && (c <= '9')) || /* !start */
3284 (c == '_') || (c == ':') ||
3285 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3286 ((c >= 0xC0) && (c <= 0xD6)) ||
3287 ((c >= 0xD8) && (c <= 0xF6)) ||
3288 ((c >= 0xF8) && (c <= 0x2FF)) ||
3289 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3290 ((c >= 0x370) && (c <= 0x37D)) ||
3291 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3292 ((c >= 0x200C) && (c <= 0x200D)) ||
3293 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3294 ((c >= 0x2070) && (c <= 0x218F)) ||
3295 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3296 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3297 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3298 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3299 ((c >= 0x10000) && (c <= 0xEFFFF))))
3300 return(1);
3301 } else {
3302 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3303 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003304 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003305 (IS_COMBINING(c)) ||
3306 (IS_EXTENDER(c)))
3307 return(1);
3308 }
3309 return(0);
3310}
3311
Daniel Veillarde57ec792003-09-10 10:50:59 +00003312static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003313 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003314
Daniel Veillard34e3f642008-07-29 09:02:27 +00003315static const xmlChar *
3316xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3317 int len = 0, l;
3318 int c;
3319 int count = 0;
3320
Daniel Veillardc6561462009-03-25 10:22:31 +00003321#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003322 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003323#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003324
3325 /*
3326 * Handler for more complex cases
3327 */
3328 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003329 if (ctxt->instate == XML_PARSER_EOF)
3330 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 c = CUR_CHAR(l);
3332 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3333 /*
3334 * Use the new checks of production [4] [4a] amd [5] of the
3335 * Update 5 of XML-1.0
3336 */
3337 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3338 (!(((c >= 'a') && (c <= 'z')) ||
3339 ((c >= 'A') && (c <= 'Z')) ||
3340 (c == '_') || (c == ':') ||
3341 ((c >= 0xC0) && (c <= 0xD6)) ||
3342 ((c >= 0xD8) && (c <= 0xF6)) ||
3343 ((c >= 0xF8) && (c <= 0x2FF)) ||
3344 ((c >= 0x370) && (c <= 0x37D)) ||
3345 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3346 ((c >= 0x200C) && (c <= 0x200D)) ||
3347 ((c >= 0x2070) && (c <= 0x218F)) ||
3348 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3349 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3350 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3351 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3352 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3353 return(NULL);
3354 }
3355 len += l;
3356 NEXTL(l);
3357 c = CUR_CHAR(l);
3358 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3359 (((c >= 'a') && (c <= 'z')) ||
3360 ((c >= 'A') && (c <= 'Z')) ||
3361 ((c >= '0') && (c <= '9')) || /* !start */
3362 (c == '_') || (c == ':') ||
3363 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3364 ((c >= 0xC0) && (c <= 0xD6)) ||
3365 ((c >= 0xD8) && (c <= 0xF6)) ||
3366 ((c >= 0xF8) && (c <= 0x2FF)) ||
3367 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3368 ((c >= 0x370) && (c <= 0x37D)) ||
3369 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3370 ((c >= 0x200C) && (c <= 0x200D)) ||
3371 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3372 ((c >= 0x2070) && (c <= 0x218F)) ||
3373 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3374 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3375 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3376 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3377 ((c >= 0x10000) && (c <= 0xEFFFF))
3378 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003379 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003380 count = 0;
3381 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003382 if (ctxt->instate == XML_PARSER_EOF)
3383 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003384 }
3385 len += l;
3386 NEXTL(l);
3387 c = CUR_CHAR(l);
3388 }
3389 } else {
3390 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3391 (!IS_LETTER(c) && (c != '_') &&
3392 (c != ':'))) {
3393 return(NULL);
3394 }
3395 len += l;
3396 NEXTL(l);
3397 c = CUR_CHAR(l);
3398
3399 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3400 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3401 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003402 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003403 (IS_COMBINING(c)) ||
3404 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003405 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003406 count = 0;
3407 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003408 if (ctxt->instate == XML_PARSER_EOF)
3409 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003410 }
3411 len += l;
3412 NEXTL(l);
3413 c = CUR_CHAR(l);
3414 }
3415 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003416 if ((len > XML_MAX_NAME_LENGTH) &&
3417 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3418 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3419 return(NULL);
3420 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003421 if (ctxt->input->cur - ctxt->input->base < len) {
3422 /*
3423 * There were a couple of bugs where PERefs lead to to a change
3424 * of the buffer. Check the buffer size to avoid passing an invalid
3425 * pointer to xmlDictLookup.
3426 */
3427 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3428 "unexpected change of input buffer");
3429 return (NULL);
3430 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003431 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3432 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3433 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3434}
3435
Owen Taylor3473f882001-02-23 17:55:21 +00003436/**
3437 * xmlParseName:
3438 * @ctxt: an XML parser context
3439 *
3440 * parse an XML name.
3441 *
3442 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3443 * CombiningChar | Extender
3444 *
3445 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3446 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003447 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003448 *
3449 * Returns the Name parsed or NULL
3450 */
3451
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003452const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003453xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003454 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003455 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003456 int count = 0;
3457
3458 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003459
Daniel Veillardc6561462009-03-25 10:22:31 +00003460#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003461 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003462#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003463
Daniel Veillard48b2f892001-02-25 16:11:03 +00003464 /*
3465 * Accelerator for simple ASCII names
3466 */
3467 in = ctxt->input->cur;
3468 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3469 ((*in >= 0x41) && (*in <= 0x5A)) ||
3470 (*in == '_') || (*in == ':')) {
3471 in++;
3472 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3473 ((*in >= 0x41) && (*in <= 0x5A)) ||
3474 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003475 (*in == '_') || (*in == '-') ||
3476 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003477 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003478 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003479 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003480 if ((count > XML_MAX_NAME_LENGTH) &&
3481 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3482 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3483 return(NULL);
3484 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003485 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003486 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003487 ctxt->nbChars += count;
3488 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003489 if (ret == NULL)
3490 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003491 return(ret);
3492 }
3493 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003494 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003495 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003496}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003497
Daniel Veillard34e3f642008-07-29 09:02:27 +00003498static const xmlChar *
3499xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3500 int len = 0, l;
3501 int c;
3502 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003503 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504
Daniel Veillardc6561462009-03-25 10:22:31 +00003505#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003506 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003507#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003508
3509 /*
3510 * Handler for more complex cases
3511 */
3512 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003513 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003514 c = CUR_CHAR(l);
3515 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3516 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3517 return(NULL);
3518 }
3519
3520 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3521 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003522 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003523 if ((len > XML_MAX_NAME_LENGTH) &&
3524 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3525 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3526 return(NULL);
3527 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003528 count = 0;
3529 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003530 if (ctxt->instate == XML_PARSER_EOF)
3531 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003532 }
3533 len += l;
3534 NEXTL(l);
3535 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003536 if (c == 0) {
3537 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003538 /*
3539 * when shrinking to extend the buffer we really need to preserve
3540 * the part of the name we already parsed. Hence rolling back
3541 * by current lenght.
3542 */
3543 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003544 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003545 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003546 if (ctxt->instate == XML_PARSER_EOF)
3547 return(NULL);
3548 c = CUR_CHAR(l);
3549 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003550 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003551 if ((len > XML_MAX_NAME_LENGTH) &&
3552 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3553 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3554 return(NULL);
3555 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003556 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003557}
3558
3559/**
3560 * xmlParseNCName:
3561 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003562 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003563 *
3564 * parse an XML name.
3565 *
3566 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3567 * CombiningChar | Extender
3568 *
3569 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3570 *
3571 * Returns the Name parsed or NULL
3572 */
3573
3574static const xmlChar *
3575xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003576 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003577 const xmlChar *ret;
3578 int count = 0;
3579
Daniel Veillardc6561462009-03-25 10:22:31 +00003580#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003581 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003582#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003583
3584 /*
3585 * Accelerator for simple ASCII names
3586 */
3587 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003588 e = ctxt->input->end;
3589 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3590 ((*in >= 0x41) && (*in <= 0x5A)) ||
3591 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003592 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003593 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3594 ((*in >= 0x41) && (*in <= 0x5A)) ||
3595 ((*in >= 0x30) && (*in <= 0x39)) ||
3596 (*in == '_') || (*in == '-') ||
3597 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003598 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003599 if (in >= e)
3600 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003601 if ((*in > 0) && (*in < 0x80)) {
3602 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003603 if ((count > XML_MAX_NAME_LENGTH) &&
3604 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3605 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3606 return(NULL);
3607 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003608 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3609 ctxt->input->cur = in;
3610 ctxt->nbChars += count;
3611 ctxt->input->col += count;
3612 if (ret == NULL) {
3613 xmlErrMemory(ctxt, NULL);
3614 }
3615 return(ret);
3616 }
3617 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003618complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003619 return(xmlParseNCNameComplex(ctxt));
3620}
3621
Daniel Veillard46de64e2002-05-29 08:21:33 +00003622/**
3623 * xmlParseNameAndCompare:
3624 * @ctxt: an XML parser context
3625 *
3626 * parse an XML name and compares for match
3627 * (specialized for endtag parsing)
3628 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003629 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3630 * and the name for mismatch
3631 */
3632
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003633static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003634xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003635 register const xmlChar *cmp = other;
3636 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003637 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003638
3639 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003640 if (ctxt->instate == XML_PARSER_EOF)
3641 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003642
Daniel Veillard46de64e2002-05-29 08:21:33 +00003643 in = ctxt->input->cur;
3644 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003645 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003646 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003647 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003648 }
William M. Brack76e95df2003-10-18 16:20:14 +00003649 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003650 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003651 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003652 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003653 }
3654 /* failure (or end of input buffer), check with full function */
3655 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003656 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003657 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003658 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003659 }
3660 return ret;
3661}
3662
Owen Taylor3473f882001-02-23 17:55:21 +00003663/**
3664 * xmlParseStringName:
3665 * @ctxt: an XML parser context
3666 * @str: a pointer to the string pointer (IN/OUT)
3667 *
3668 * parse an XML name.
3669 *
3670 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3671 * CombiningChar | Extender
3672 *
3673 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3674 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003675 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003676 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003677 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003678 * is updated to the current location in the string.
3679 */
3680
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003681static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003682xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3683 xmlChar buf[XML_MAX_NAMELEN + 5];
3684 const xmlChar *cur = *str;
3685 int len = 0, l;
3686 int c;
3687
Daniel Veillardc6561462009-03-25 10:22:31 +00003688#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003689 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003690#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003691
Owen Taylor3473f882001-02-23 17:55:21 +00003692 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003693 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003694 return(NULL);
3695 }
3696
Daniel Veillard34e3f642008-07-29 09:02:27 +00003697 COPY_BUF(l,buf,len,c);
3698 cur += l;
3699 c = CUR_SCHAR(cur, l);
3700 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003701 COPY_BUF(l,buf,len,c);
3702 cur += l;
3703 c = CUR_SCHAR(cur, l);
3704 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3705 /*
3706 * Okay someone managed to make a huge name, so he's ready to pay
3707 * for the processing speed.
3708 */
3709 xmlChar *buffer;
3710 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003711
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003712 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003713 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003714 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003715 return(NULL);
3716 }
3717 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003718 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003719 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003720 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003721
3722 if ((len > XML_MAX_NAME_LENGTH) &&
3723 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3724 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3725 xmlFree(buffer);
3726 return(NULL);
3727 }
Owen Taylor3473f882001-02-23 17:55:21 +00003728 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003729 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003730 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003731 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003732 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003733 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003734 return(NULL);
3735 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003736 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003737 }
3738 COPY_BUF(l,buffer,len,c);
3739 cur += l;
3740 c = CUR_SCHAR(cur, l);
3741 }
3742 buffer[len] = 0;
3743 *str = cur;
3744 return(buffer);
3745 }
3746 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003747 if ((len > XML_MAX_NAME_LENGTH) &&
3748 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3749 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3750 return(NULL);
3751 }
Owen Taylor3473f882001-02-23 17:55:21 +00003752 *str = cur;
3753 return(xmlStrndup(buf, len));
3754}
3755
3756/**
3757 * xmlParseNmtoken:
3758 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003759 *
Owen Taylor3473f882001-02-23 17:55:21 +00003760 * parse an XML Nmtoken.
3761 *
3762 * [7] Nmtoken ::= (NameChar)+
3763 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003764 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003765 *
3766 * Returns the Nmtoken parsed or NULL
3767 */
3768
3769xmlChar *
3770xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3771 xmlChar buf[XML_MAX_NAMELEN + 5];
3772 int len = 0, l;
3773 int c;
3774 int count = 0;
3775
Daniel Veillardc6561462009-03-25 10:22:31 +00003776#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003777 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003778#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003779
Owen Taylor3473f882001-02-23 17:55:21 +00003780 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003781 if (ctxt->instate == XML_PARSER_EOF)
3782 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 c = CUR_CHAR(l);
3784
Daniel Veillard34e3f642008-07-29 09:02:27 +00003785 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003786 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003787 count = 0;
3788 GROW;
3789 }
3790 COPY_BUF(l,buf,len,c);
3791 NEXTL(l);
3792 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003793 if (c == 0) {
3794 count = 0;
3795 GROW;
3796 if (ctxt->instate == XML_PARSER_EOF)
3797 return(NULL);
3798 c = CUR_CHAR(l);
3799 }
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (len >= XML_MAX_NAMELEN) {
3801 /*
3802 * Okay someone managed to make a huge token, so he's ready to pay
3803 * for the processing speed.
3804 */
3805 xmlChar *buffer;
3806 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003807
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003808 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003809 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003811 return(NULL);
3812 }
3813 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003814 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003815 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003816 count = 0;
3817 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003818 if (ctxt->instate == XML_PARSER_EOF) {
3819 xmlFree(buffer);
3820 return(NULL);
3821 }
Owen Taylor3473f882001-02-23 17:55:21 +00003822 }
3823 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003824 xmlChar *tmp;
3825
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003826 if ((max > XML_MAX_NAME_LENGTH) &&
3827 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3828 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3829 xmlFree(buffer);
3830 return(NULL);
3831 }
Owen Taylor3473f882001-02-23 17:55:21 +00003832 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003833 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003834 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003835 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003836 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003837 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003838 return(NULL);
3839 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003840 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003841 }
3842 COPY_BUF(l,buffer,len,c);
3843 NEXTL(l);
3844 c = CUR_CHAR(l);
3845 }
3846 buffer[len] = 0;
3847 return(buffer);
3848 }
3849 }
3850 if (len == 0)
3851 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003852 if ((len > XML_MAX_NAME_LENGTH) &&
3853 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3854 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3855 return(NULL);
3856 }
Owen Taylor3473f882001-02-23 17:55:21 +00003857 return(xmlStrndup(buf, len));
3858}
3859
3860/**
3861 * xmlParseEntityValue:
3862 * @ctxt: an XML parser context
3863 * @orig: if non-NULL store a copy of the original entity value
3864 *
3865 * parse a value for ENTITY declarations
3866 *
3867 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3868 * "'" ([^%&'] | PEReference | Reference)* "'"
3869 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003870 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003871 */
3872
3873xmlChar *
3874xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3875 xmlChar *buf = NULL;
3876 int len = 0;
3877 int size = XML_PARSER_BUFFER_SIZE;
3878 int c, l;
3879 xmlChar stop;
3880 xmlChar *ret = NULL;
3881 const xmlChar *cur = NULL;
3882 xmlParserInputPtr input;
3883
3884 if (RAW == '"') stop = '"';
3885 else if (RAW == '\'') stop = '\'';
3886 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003887 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003888 return(NULL);
3889 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003890 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003891 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003892 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003893 return(NULL);
3894 }
3895
3896 /*
3897 * The content of the entity definition is copied in a buffer.
3898 */
3899
3900 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3901 input = ctxt->input;
3902 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003903 if (ctxt->instate == XML_PARSER_EOF) {
3904 xmlFree(buf);
3905 return(NULL);
3906 }
Owen Taylor3473f882001-02-23 17:55:21 +00003907 NEXT;
3908 c = CUR_CHAR(l);
3909 /*
3910 * NOTE: 4.4.5 Included in Literal
3911 * When a parameter entity reference appears in a literal entity
3912 * value, ... a single or double quote character in the replacement
3913 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003914 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003915 * In practice it means we stop the loop only when back at parsing
3916 * the initial entity and the quote is found
3917 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003918 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3919 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003920 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003921 xmlChar *tmp;
3922
Owen Taylor3473f882001-02-23 17:55:21 +00003923 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003924 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3925 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003926 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003927 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003928 return(NULL);
3929 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003930 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003931 }
3932 COPY_BUF(l,buf,len,c);
3933 NEXTL(l);
3934 /*
3935 * Pop-up of finished entities.
3936 */
3937 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3938 xmlPopInput(ctxt);
3939
3940 GROW;
3941 c = CUR_CHAR(l);
3942 if (c == 0) {
3943 GROW;
3944 c = CUR_CHAR(l);
3945 }
3946 }
3947 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003948 if (ctxt->instate == XML_PARSER_EOF) {
3949 xmlFree(buf);
3950 return(NULL);
3951 }
Owen Taylor3473f882001-02-23 17:55:21 +00003952
3953 /*
3954 * Raise problem w.r.t. '&' and '%' being used in non-entities
3955 * reference constructs. Note Charref will be handled in
3956 * xmlStringDecodeEntities()
3957 */
3958 cur = buf;
3959 while (*cur != 0) { /* non input consuming */
3960 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3961 xmlChar *name;
3962 xmlChar tmp = *cur;
3963
3964 cur++;
3965 name = xmlParseStringName(ctxt, &cur);
3966 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003967 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003968 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003969 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003971 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3972 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003973 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003974 }
3975 if (name != NULL)
3976 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003977 if (*cur == 0)
3978 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003979 }
3980 cur++;
3981 }
3982
3983 /*
3984 * Then PEReference entities are substituted.
3985 */
3986 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003987 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003988 xmlFree(buf);
3989 } else {
3990 NEXT;
3991 /*
3992 * NOTE: 4.4.7 Bypassed
3993 * When a general entity reference appears in the EntityValue in
3994 * an entity declaration, it is bypassed and left as is.
3995 * so XML_SUBSTITUTE_REF is not set here.
3996 */
Peter Simons8f30bdf2016-04-15 11:56:55 +02003997 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003998 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3999 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004000 --ctxt->depth;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004001 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00004002 *orig = buf;
4003 else
4004 xmlFree(buf);
4005 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004006
Owen Taylor3473f882001-02-23 17:55:21 +00004007 return(ret);
4008}
4009
4010/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00004011 * xmlParseAttValueComplex:
4012 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00004013 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004014 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00004015 *
4016 * parse a value for an attribute, this is the fallback function
4017 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004018 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00004019 *
4020 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4021 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00004022static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004023xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00004024 xmlChar limit = 0;
4025 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004026 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004027 size_t len = 0;
4028 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004029 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004030 xmlChar *current = NULL;
4031 xmlEntityPtr ent;
4032
Owen Taylor3473f882001-02-23 17:55:21 +00004033 if (NXT(0) == '"') {
4034 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4035 limit = '"';
4036 NEXT;
4037 } else if (NXT(0) == '\'') {
4038 limit = '\'';
4039 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4040 NEXT;
4041 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004042 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 return(NULL);
4044 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00004045
Owen Taylor3473f882001-02-23 17:55:21 +00004046 /*
4047 * allocate a translation buffer.
4048 */
4049 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004050 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004051 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00004052
4053 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004054 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00004055 */
4056 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004057 while (((NXT(0) != limit) && /* checked */
4058 (IS_CHAR(c)) && (c != '<')) &&
4059 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08004060 /*
4061 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4062 * special option is given
4063 */
4064 if ((len > XML_MAX_TEXT_LENGTH) &&
4065 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4066 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004067 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08004068 goto mem_error;
4069 }
Owen Taylor3473f882001-02-23 17:55:21 +00004070 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00004071 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00004072 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004073 if (NXT(1) == '#') {
4074 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004075
Owen Taylor3473f882001-02-23 17:55:21 +00004076 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00004077 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004078 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004079 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004080 }
4081 buf[len++] = '&';
4082 } else {
4083 /*
4084 * The reparsing will be done in xmlStringGetNodeList()
4085 * called by the attribute() function in SAX.c
4086 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004087 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004088 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004089 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004090 buf[len++] = '&';
4091 buf[len++] = '#';
4092 buf[len++] = '3';
4093 buf[len++] = '8';
4094 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004095 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004096 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004097 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004098 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004099 }
Owen Taylor3473f882001-02-23 17:55:21 +00004100 len += xmlCopyChar(0, &buf[len], val);
4101 }
4102 } else {
4103 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004104 ctxt->nbentities++;
4105 if (ent != NULL)
4106 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004107 if ((ent != NULL) &&
4108 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004109 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004110 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004111 }
4112 if ((ctxt->replaceEntities == 0) &&
4113 (ent->content[0] == '&')) {
4114 buf[len++] = '&';
4115 buf[len++] = '#';
4116 buf[len++] = '3';
4117 buf[len++] = '8';
4118 buf[len++] = ';';
4119 } else {
4120 buf[len++] = ent->content[0];
4121 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004122 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004123 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004124 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02004125 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004126 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004127 XML_SUBSTITUTE_REF,
4128 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004129 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 if (rep != NULL) {
4131 current = rep;
4132 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004133 if ((*current == 0xD) || (*current == 0xA) ||
4134 (*current == 0x9)) {
4135 buf[len++] = 0x20;
4136 current++;
4137 } else
4138 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004139 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004140 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004141 }
4142 }
4143 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004144 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004145 }
4146 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004147 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004148 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004149 }
Owen Taylor3473f882001-02-23 17:55:21 +00004150 if (ent->content != NULL)
4151 buf[len++] = ent->content[0];
4152 }
4153 } else if (ent != NULL) {
4154 int i = xmlStrlen(ent->name);
4155 const xmlChar *cur = ent->name;
4156
4157 /*
4158 * This may look absurd but is needed to detect
4159 * entities problems
4160 */
4161 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004162 (ent->content != NULL) && (ent->checked == 0)) {
4163 unsigned long oldnbent = ctxt->nbentities;
4164
Peter Simons8f30bdf2016-04-15 11:56:55 +02004165 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004166 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004167 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004168 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004169
Daniel Veillardcff25462013-03-11 15:57:55 +08004170 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004171 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004172 if (xmlStrchr(rep, '<'))
4173 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004174 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004175 rep = NULL;
4176 }
Owen Taylor3473f882001-02-23 17:55:21 +00004177 }
4178
4179 /*
4180 * Just output the reference
4181 */
4182 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004183 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004184 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004185 }
4186 for (;i > 0;i--)
4187 buf[len++] = *cur++;
4188 buf[len++] = ';';
4189 }
4190 }
4191 } else {
4192 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004193 if ((len != 0) || (!normalize)) {
4194 if ((!normalize) || (!in_space)) {
4195 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004196 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004197 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004198 }
4199 }
4200 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004201 }
4202 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004203 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004204 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004205 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004206 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004207 }
4208 }
4209 NEXTL(l);
4210 }
4211 GROW;
4212 c = CUR_CHAR(l);
4213 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004214 if (ctxt->instate == XML_PARSER_EOF)
4215 goto error;
4216
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004217 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004218 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004219 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004220 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004221 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004222 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004223 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004224 if ((c != 0) && (!IS_CHAR(c))) {
4225 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4226 "invalid character in attribute value\n");
4227 } else {
4228 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4229 "AttValue: ' expected\n");
4230 }
Owen Taylor3473f882001-02-23 17:55:21 +00004231 } else
4232 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004233
4234 /*
4235 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004236 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004237 */
4238 if (len >= INT_MAX) {
4239 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004240 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004241 goto mem_error;
4242 }
4243
4244 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004245 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004246
4247mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004248 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004249error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004250 if (buf != NULL)
4251 xmlFree(buf);
4252 if (rep != NULL)
4253 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004254 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004255}
4256
4257/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004258 * xmlParseAttValue:
4259 * @ctxt: an XML parser context
4260 *
4261 * parse a value for an attribute
4262 * Note: the parser won't do substitution of entities here, this
4263 * will be handled later in xmlStringGetNodeList
4264 *
4265 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4266 * "'" ([^<&'] | Reference)* "'"
4267 *
4268 * 3.3.3 Attribute-Value Normalization:
4269 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004270 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004271 * - a character reference is processed by appending the referenced
4272 * character to the attribute value
4273 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004274 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004275 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4276 * appending #x20 to the normalized value, except that only a single
4277 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004278 * parsed entity or the literal entity value of an internal parsed entity
4279 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004280 * If the declared value is not CDATA, then the XML processor must further
4281 * process the normalized attribute value by discarding any leading and
4282 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004283 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004284 * All attributes for which no declaration has been read should be treated
4285 * by a non-validating parser as if declared CDATA.
4286 *
4287 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4288 */
4289
4290
4291xmlChar *
4292xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004293 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004294 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004295}
4296
4297/**
Owen Taylor3473f882001-02-23 17:55:21 +00004298 * xmlParseSystemLiteral:
4299 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004300 *
Owen Taylor3473f882001-02-23 17:55:21 +00004301 * parse an XML Literal
4302 *
4303 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4304 *
4305 * Returns the SystemLiteral parsed or NULL
4306 */
4307
4308xmlChar *
4309xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4310 xmlChar *buf = NULL;
4311 int len = 0;
4312 int size = XML_PARSER_BUFFER_SIZE;
4313 int cur, l;
4314 xmlChar stop;
4315 int state = ctxt->instate;
4316 int count = 0;
4317
4318 SHRINK;
4319 if (RAW == '"') {
4320 NEXT;
4321 stop = '"';
4322 } else if (RAW == '\'') {
4323 NEXT;
4324 stop = '\'';
4325 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004326 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004327 return(NULL);
4328 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004329
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004330 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004331 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004332 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004333 return(NULL);
4334 }
4335 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4336 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004337 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004338 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004339 xmlChar *tmp;
4340
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004341 if ((size > XML_MAX_NAME_LENGTH) &&
4342 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4343 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4344 xmlFree(buf);
4345 ctxt->instate = (xmlParserInputState) state;
4346 return(NULL);
4347 }
Owen Taylor3473f882001-02-23 17:55:21 +00004348 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004349 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4350 if (tmp == NULL) {
4351 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004352 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004353 ctxt->instate = (xmlParserInputState) state;
4354 return(NULL);
4355 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004356 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004357 }
4358 count++;
4359 if (count > 50) {
4360 GROW;
4361 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004362 if (ctxt->instate == XML_PARSER_EOF) {
4363 xmlFree(buf);
4364 return(NULL);
4365 }
Owen Taylor3473f882001-02-23 17:55:21 +00004366 }
4367 COPY_BUF(l,buf,len,cur);
4368 NEXTL(l);
4369 cur = CUR_CHAR(l);
4370 if (cur == 0) {
4371 GROW;
4372 SHRINK;
4373 cur = CUR_CHAR(l);
4374 }
4375 }
4376 buf[len] = 0;
4377 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004378 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004379 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004380 } else {
4381 NEXT;
4382 }
4383 return(buf);
4384}
4385
4386/**
4387 * xmlParsePubidLiteral:
4388 * @ctxt: an XML parser context
4389 *
4390 * parse an XML public literal
4391 *
4392 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4393 *
4394 * Returns the PubidLiteral parsed or NULL.
4395 */
4396
4397xmlChar *
4398xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4399 xmlChar *buf = NULL;
4400 int len = 0;
4401 int size = XML_PARSER_BUFFER_SIZE;
4402 xmlChar cur;
4403 xmlChar stop;
4404 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004405 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004406
4407 SHRINK;
4408 if (RAW == '"') {
4409 NEXT;
4410 stop = '"';
4411 } else if (RAW == '\'') {
4412 NEXT;
4413 stop = '\'';
4414 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004415 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004416 return(NULL);
4417 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004418 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004419 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004420 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004421 return(NULL);
4422 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004423 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004424 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004425 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004426 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004427 xmlChar *tmp;
4428
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004429 if ((size > XML_MAX_NAME_LENGTH) &&
4430 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4431 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4432 xmlFree(buf);
4433 return(NULL);
4434 }
Owen Taylor3473f882001-02-23 17:55:21 +00004435 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004436 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4437 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004439 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004440 return(NULL);
4441 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004442 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004443 }
4444 buf[len++] = cur;
4445 count++;
4446 if (count > 50) {
4447 GROW;
4448 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004449 if (ctxt->instate == XML_PARSER_EOF) {
4450 xmlFree(buf);
4451 return(NULL);
4452 }
Owen Taylor3473f882001-02-23 17:55:21 +00004453 }
4454 NEXT;
4455 cur = CUR;
4456 if (cur == 0) {
4457 GROW;
4458 SHRINK;
4459 cur = CUR;
4460 }
4461 }
4462 buf[len] = 0;
4463 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004464 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004465 } else {
4466 NEXT;
4467 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004468 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004469 return(buf);
4470}
4471
Daniel Veillard8ed10722009-08-20 19:17:36 +02004472static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004473
4474/*
4475 * used for the test in the inner loop of the char data testing
4476 */
4477static const unsigned char test_char_data[256] = {
4478 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4480 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4482 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4483 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4484 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4485 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4486 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4487 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4488 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4489 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4490 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4491 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4492 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4493 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4509 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4510};
4511
Owen Taylor3473f882001-02-23 17:55:21 +00004512/**
4513 * xmlParseCharData:
4514 * @ctxt: an XML parser context
4515 * @cdata: int indicating whether we are within a CDATA section
4516 *
4517 * parse a CharData section.
4518 * if we are within a CDATA section ']]>' marks an end of section.
4519 *
4520 * The right angle bracket (>) may be represented using the string "&gt;",
4521 * and must, for compatibility, be escaped using "&gt;" or a character
4522 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004523 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004524 *
4525 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4526 */
4527
4528void
4529xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004530 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004531 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004532 int line = ctxt->input->line;
4533 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004534 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004535
4536 SHRINK;
4537 GROW;
4538 /*
4539 * Accelerated common case where input don't need to be
4540 * modified before passing it to the handler.
4541 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004542 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004543 in = ctxt->input->cur;
4544 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004545get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004546 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004547 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004548 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004549 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004550 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004551 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004552 goto get_more_space;
4553 }
4554 if (*in == '<') {
4555 nbchar = in - ctxt->input->cur;
4556 if (nbchar > 0) {
4557 const xmlChar *tmp = ctxt->input->cur;
4558 ctxt->input->cur = in;
4559
Daniel Veillard34099b42004-11-04 17:34:35 +00004560 if ((ctxt->sax != NULL) &&
4561 (ctxt->sax->ignorableWhitespace !=
4562 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004563 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004564 if (ctxt->sax->ignorableWhitespace != NULL)
4565 ctxt->sax->ignorableWhitespace(ctxt->userData,
4566 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004567 } else {
4568 if (ctxt->sax->characters != NULL)
4569 ctxt->sax->characters(ctxt->userData,
4570 tmp, nbchar);
4571 if (*ctxt->space == -1)
4572 *ctxt->space = -2;
4573 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004574 } else if ((ctxt->sax != NULL) &&
4575 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004576 ctxt->sax->characters(ctxt->userData,
4577 tmp, nbchar);
4578 }
4579 }
4580 return;
4581 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004582
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004583get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004584 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004585 while (test_char_data[*in]) {
4586 in++;
4587 ccol++;
4588 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004589 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004590 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004591 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004592 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004593 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004594 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004595 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004596 }
4597 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004598 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004599 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004600 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004601 return;
4602 }
4603 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004604 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004605 goto get_more;
4606 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004607 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004608 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004609 if ((ctxt->sax != NULL) &&
4610 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004611 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004612 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004613 const xmlChar *tmp = ctxt->input->cur;
4614 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004615
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004616 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004617 if (ctxt->sax->ignorableWhitespace != NULL)
4618 ctxt->sax->ignorableWhitespace(ctxt->userData,
4619 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004620 } else {
4621 if (ctxt->sax->characters != NULL)
4622 ctxt->sax->characters(ctxt->userData,
4623 tmp, nbchar);
4624 if (*ctxt->space == -1)
4625 *ctxt->space = -2;
4626 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004627 line = ctxt->input->line;
4628 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004629 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004630 if (ctxt->sax->characters != NULL)
4631 ctxt->sax->characters(ctxt->userData,
4632 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004633 line = ctxt->input->line;
4634 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004635 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004636 /* something really bad happened in the SAX callback */
4637 if (ctxt->instate != XML_PARSER_CONTENT)
4638 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004639 }
4640 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004641 if (*in == 0xD) {
4642 in++;
4643 if (*in == 0xA) {
4644 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004645 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004646 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004647 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004648 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004649 in--;
4650 }
4651 if (*in == '<') {
4652 return;
4653 }
4654 if (*in == '&') {
4655 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004656 }
4657 SHRINK;
4658 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004659 if (ctxt->instate == XML_PARSER_EOF)
4660 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004661 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004662 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004663 nbchar = 0;
4664 }
Daniel Veillard50582112001-03-26 22:52:16 +00004665 ctxt->input->line = line;
4666 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004667 xmlParseCharDataComplex(ctxt, cdata);
4668}
4669
Daniel Veillard01c13b52002-12-10 15:19:08 +00004670/**
4671 * xmlParseCharDataComplex:
4672 * @ctxt: an XML parser context
4673 * @cdata: int indicating whether we are within a CDATA section
4674 *
4675 * parse a CharData section.this is the fallback function
4676 * of xmlParseCharData() when the parsing requires handling
4677 * of non-ASCII characters.
4678 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004679static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004680xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004681 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4682 int nbchar = 0;
4683 int cur, l;
4684 int count = 0;
4685
4686 SHRINK;
4687 GROW;
4688 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004689 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004690 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004691 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004692 if ((cur == ']') && (NXT(1) == ']') &&
4693 (NXT(2) == '>')) {
4694 if (cdata) break;
4695 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004696 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004697 }
4698 }
4699 COPY_BUF(l,buf,nbchar,cur);
4700 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004701 buf[nbchar] = 0;
4702
Owen Taylor3473f882001-02-23 17:55:21 +00004703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004704 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004705 */
4706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004707 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004708 if (ctxt->sax->ignorableWhitespace != NULL)
4709 ctxt->sax->ignorableWhitespace(ctxt->userData,
4710 buf, nbchar);
4711 } else {
4712 if (ctxt->sax->characters != NULL)
4713 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004714 if ((ctxt->sax->characters !=
4715 ctxt->sax->ignorableWhitespace) &&
4716 (*ctxt->space == -1))
4717 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004718 }
4719 }
4720 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004721 /* something really bad happened in the SAX callback */
4722 if (ctxt->instate != XML_PARSER_CONTENT)
4723 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004724 }
4725 count++;
4726 if (count > 50) {
4727 GROW;
4728 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004729 if (ctxt->instate == XML_PARSER_EOF)
4730 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004731 }
4732 NEXTL(l);
4733 cur = CUR_CHAR(l);
4734 }
4735 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004736 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004737 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004738 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004739 */
4740 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004741 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004742 if (ctxt->sax->ignorableWhitespace != NULL)
4743 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4744 } else {
4745 if (ctxt->sax->characters != NULL)
4746 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004747 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4748 (*ctxt->space == -1))
4749 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004750 }
4751 }
4752 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004753 if ((cur != 0) && (!IS_CHAR(cur))) {
4754 /* Generate the error and skip the offending character */
4755 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4756 "PCDATA invalid Char value %d\n",
4757 cur);
4758 NEXTL(l);
4759 }
Owen Taylor3473f882001-02-23 17:55:21 +00004760}
4761
4762/**
4763 * xmlParseExternalID:
4764 * @ctxt: an XML parser context
4765 * @publicID: a xmlChar** receiving PubidLiteral
4766 * @strict: indicate whether we should restrict parsing to only
4767 * production [75], see NOTE below
4768 *
4769 * Parse an External ID or a Public ID
4770 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004771 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004772 * 'PUBLIC' S PubidLiteral S SystemLiteral
4773 *
4774 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4775 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4776 *
4777 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4778 *
4779 * Returns the function returns SystemLiteral and in the second
4780 * case publicID receives PubidLiteral, is strict is off
4781 * it is possible to return NULL and have publicID set.
4782 */
4783
4784xmlChar *
4785xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4786 xmlChar *URI = NULL;
4787
4788 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004789
4790 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004791 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004792 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004793 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004794 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4795 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004796 }
4797 SKIP_BLANKS;
4798 URI = xmlParseSystemLiteral(ctxt);
4799 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004800 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004801 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004802 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004803 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004804 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004805 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004806 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004807 }
4808 SKIP_BLANKS;
4809 *publicID = xmlParsePubidLiteral(ctxt);
4810 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004811 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004812 }
4813 if (strict) {
4814 /*
4815 * We don't handle [83] so "S SystemLiteral" is required.
4816 */
William M. Brack76e95df2003-10-18 16:20:14 +00004817 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004818 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004819 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004820 }
4821 } else {
4822 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004823 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004824 * "S SystemLiteral" is not detected. From a purely parsing
4825 * point of view that's a nice mess.
4826 */
4827 const xmlChar *ptr;
4828 GROW;
4829
4830 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004831 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004832
William M. Brack76e95df2003-10-18 16:20:14 +00004833 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004834 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4835 }
4836 SKIP_BLANKS;
4837 URI = xmlParseSystemLiteral(ctxt);
4838 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004839 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004840 }
4841 }
4842 return(URI);
4843}
4844
4845/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004846 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004847 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004848 * @buf: the already parsed part of the buffer
4849 * @len: number of bytes filles in the buffer
4850 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004851 *
4852 * Skip an XML (SGML) comment <!-- .... -->
4853 * The spec says that "For compatibility, the string "--" (double-hyphen)
4854 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004855 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004856 *
4857 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4858 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004859static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004860xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4861 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004862 int q, ql;
4863 int r, rl;
4864 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004865 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004866 int inputid;
4867
4868 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004869
Owen Taylor3473f882001-02-23 17:55:21 +00004870 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004871 len = 0;
4872 size = XML_PARSER_BUFFER_SIZE;
4873 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4874 if (buf == NULL) {
4875 xmlErrMemory(ctxt, NULL);
4876 return;
4877 }
Owen Taylor3473f882001-02-23 17:55:21 +00004878 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004879 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004880 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004881 if (q == 0)
4882 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004883 if (!IS_CHAR(q)) {
4884 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4885 "xmlParseComment: invalid xmlChar value %d\n",
4886 q);
4887 xmlFree (buf);
4888 return;
4889 }
Owen Taylor3473f882001-02-23 17:55:21 +00004890 NEXTL(ql);
4891 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004892 if (r == 0)
4893 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004894 if (!IS_CHAR(r)) {
4895 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4896 "xmlParseComment: invalid xmlChar value %d\n",
4897 q);
4898 xmlFree (buf);
4899 return;
4900 }
Owen Taylor3473f882001-02-23 17:55:21 +00004901 NEXTL(rl);
4902 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004903 if (cur == 0)
4904 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004905 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004906 ((cur != '>') ||
4907 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004908 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004909 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004910 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004911 if ((len > XML_MAX_TEXT_LENGTH) &&
4912 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4913 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4914 "Comment too big found", NULL);
4915 xmlFree (buf);
4916 return;
4917 }
Owen Taylor3473f882001-02-23 17:55:21 +00004918 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004919 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004920 size_t new_size;
4921
4922 new_size = size * 2;
4923 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004924 if (new_buf == NULL) {
4925 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004926 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004927 return;
4928 }
William M. Bracka3215c72004-07-31 16:24:01 +00004929 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004930 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004931 }
4932 COPY_BUF(ql,buf,len,q);
4933 q = r;
4934 ql = rl;
4935 r = cur;
4936 rl = l;
4937
4938 count++;
4939 if (count > 50) {
4940 GROW;
4941 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004942 if (ctxt->instate == XML_PARSER_EOF) {
4943 xmlFree(buf);
4944 return;
4945 }
Owen Taylor3473f882001-02-23 17:55:21 +00004946 }
4947 NEXTL(l);
4948 cur = CUR_CHAR(l);
4949 if (cur == 0) {
4950 SHRINK;
4951 GROW;
4952 cur = CUR_CHAR(l);
4953 }
4954 }
4955 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004956 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004957 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004958 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004959 } else if (!IS_CHAR(cur)) {
4960 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4961 "xmlParseComment: invalid xmlChar value %d\n",
4962 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004963 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004964 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004965 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4966 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004967 }
4968 NEXT;
4969 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4970 (!ctxt->disableSAX))
4971 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004972 }
Daniel Veillardda629342007-08-01 07:49:06 +00004973 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004974 return;
4975not_terminated:
4976 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4977 "Comment not terminated\n", NULL);
4978 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004979 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004980}
Daniel Veillardda629342007-08-01 07:49:06 +00004981
Daniel Veillard4c778d82005-01-23 17:37:44 +00004982/**
4983 * xmlParseComment:
4984 * @ctxt: an XML parser context
4985 *
4986 * Skip an XML (SGML) comment <!-- .... -->
4987 * The spec says that "For compatibility, the string "--" (double-hyphen)
4988 * must not occur within comments. "
4989 *
4990 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4991 */
4992void
4993xmlParseComment(xmlParserCtxtPtr ctxt) {
4994 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004995 size_t size = XML_PARSER_BUFFER_SIZE;
4996 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004997 xmlParserInputState state;
4998 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004999 size_t nbchar = 0;
5000 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00005001 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005002
5003 /*
5004 * Check that there is a comment right here.
5005 */
5006 if ((RAW != '<') || (NXT(1) != '!') ||
5007 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005008 state = ctxt->instate;
5009 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00005010 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005011 SKIP(4);
5012 SHRINK;
5013 GROW;
5014
5015 /*
5016 * Accelerated common case where input don't need to be
5017 * modified before passing it to the handler.
5018 */
5019 in = ctxt->input->cur;
5020 do {
5021 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005022 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005023 ctxt->input->line++; ctxt->input->col = 1;
5024 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005025 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005026 }
5027get_more:
5028 ccol = ctxt->input->col;
5029 while (((*in > '-') && (*in <= 0x7F)) ||
5030 ((*in >= 0x20) && (*in < '-')) ||
5031 (*in == 0x09)) {
5032 in++;
5033 ccol++;
5034 }
5035 ctxt->input->col = ccol;
5036 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005037 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005038 ctxt->input->line++; ctxt->input->col = 1;
5039 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005040 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005041 goto get_more;
5042 }
5043 nbchar = in - ctxt->input->cur;
5044 /*
5045 * save current set of data
5046 */
5047 if (nbchar > 0) {
5048 if ((ctxt->sax != NULL) &&
5049 (ctxt->sax->comment != NULL)) {
5050 if (buf == NULL) {
5051 if ((*in == '-') && (in[1] == '-'))
5052 size = nbchar + 1;
5053 else
5054 size = XML_PARSER_BUFFER_SIZE + nbchar;
5055 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5056 if (buf == NULL) {
5057 xmlErrMemory(ctxt, NULL);
5058 ctxt->instate = state;
5059 return;
5060 }
5061 len = 0;
5062 } else if (len + nbchar + 1 >= size) {
5063 xmlChar *new_buf;
5064 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5065 new_buf = (xmlChar *) xmlRealloc(buf,
5066 size * sizeof(xmlChar));
5067 if (new_buf == NULL) {
5068 xmlFree (buf);
5069 xmlErrMemory(ctxt, NULL);
5070 ctxt->instate = state;
5071 return;
5072 }
5073 buf = new_buf;
5074 }
5075 memcpy(&buf[len], ctxt->input->cur, nbchar);
5076 len += nbchar;
5077 buf[len] = 0;
5078 }
5079 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08005080 if ((len > XML_MAX_TEXT_LENGTH) &&
5081 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5082 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5083 "Comment too big found", NULL);
5084 xmlFree (buf);
5085 return;
5086 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005087 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00005088 if (*in == 0xA) {
5089 in++;
5090 ctxt->input->line++; ctxt->input->col = 1;
5091 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005092 if (*in == 0xD) {
5093 in++;
5094 if (*in == 0xA) {
5095 ctxt->input->cur = in;
5096 in++;
5097 ctxt->input->line++; ctxt->input->col = 1;
5098 continue; /* while */
5099 }
5100 in--;
5101 }
5102 SHRINK;
5103 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005104 if (ctxt->instate == XML_PARSER_EOF) {
5105 xmlFree(buf);
5106 return;
5107 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005108 in = ctxt->input->cur;
5109 if (*in == '-') {
5110 if (in[1] == '-') {
5111 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005112 if (ctxt->input->id != inputid) {
5113 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5114 "comment doesn't start and stop in the same entity\n");
5115 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005116 SKIP(3);
5117 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5118 (!ctxt->disableSAX)) {
5119 if (buf != NULL)
5120 ctxt->sax->comment(ctxt->userData, buf);
5121 else
5122 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5123 }
5124 if (buf != NULL)
5125 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005126 if (ctxt->instate != XML_PARSER_EOF)
5127 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005128 return;
5129 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005130 if (buf != NULL) {
5131 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5132 "Double hyphen within comment: "
5133 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005134 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005135 } else
5136 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5137 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005138 in++;
5139 ctxt->input->col++;
5140 }
5141 in++;
5142 ctxt->input->col++;
5143 goto get_more;
5144 }
5145 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5146 xmlParseCommentComplex(ctxt, buf, len, size);
5147 ctxt->instate = state;
5148 return;
5149}
5150
Owen Taylor3473f882001-02-23 17:55:21 +00005151
5152/**
5153 * xmlParsePITarget:
5154 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005155 *
Owen Taylor3473f882001-02-23 17:55:21 +00005156 * parse the name of a PI
5157 *
5158 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5159 *
5160 * Returns the PITarget name or NULL
5161 */
5162
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005163const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005164xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005165 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005166
5167 name = xmlParseName(ctxt);
5168 if ((name != NULL) &&
5169 ((name[0] == 'x') || (name[0] == 'X')) &&
5170 ((name[1] == 'm') || (name[1] == 'M')) &&
5171 ((name[2] == 'l') || (name[2] == 'L'))) {
5172 int i;
5173 if ((name[0] == 'x') && (name[1] == 'm') &&
5174 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005175 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005176 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005177 return(name);
5178 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005179 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005180 return(name);
5181 }
5182 for (i = 0;;i++) {
5183 if (xmlW3CPIs[i] == NULL) break;
5184 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5185 return(name);
5186 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005187 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5188 "xmlParsePITarget: invalid name prefix 'xml'\n",
5189 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 }
Daniel Veillard37334572008-07-31 08:20:02 +00005191 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005192 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005193 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005194 }
Owen Taylor3473f882001-02-23 17:55:21 +00005195 return(name);
5196}
5197
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005198#ifdef LIBXML_CATALOG_ENABLED
5199/**
5200 * xmlParseCatalogPI:
5201 * @ctxt: an XML parser context
5202 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005203 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005204 * parse an XML Catalog Processing Instruction.
5205 *
5206 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5207 *
5208 * Occurs only if allowed by the user and if happening in the Misc
5209 * part of the document before any doctype informations
5210 * This will add the given catalog to the parsing context in order
5211 * to be used if there is a resolution need further down in the document
5212 */
5213
5214static void
5215xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5216 xmlChar *URL = NULL;
5217 const xmlChar *tmp, *base;
5218 xmlChar marker;
5219
5220 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005221 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005222 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5223 goto error;
5224 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005225 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005226 if (*tmp != '=') {
5227 return;
5228 }
5229 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005230 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005231 marker = *tmp;
5232 if ((marker != '\'') && (marker != '"'))
5233 goto error;
5234 tmp++;
5235 base = tmp;
5236 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5237 if (*tmp == 0)
5238 goto error;
5239 URL = xmlStrndup(base, tmp - base);
5240 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005241 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005242 if (*tmp != 0)
5243 goto error;
5244
5245 if (URL != NULL) {
5246 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5247 xmlFree(URL);
5248 }
5249 return;
5250
5251error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005252 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5253 "Catalog PI syntax error: %s\n",
5254 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005255 if (URL != NULL)
5256 xmlFree(URL);
5257}
5258#endif
5259
Owen Taylor3473f882001-02-23 17:55:21 +00005260/**
5261 * xmlParsePI:
5262 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005263 *
Owen Taylor3473f882001-02-23 17:55:21 +00005264 * parse an XML Processing Instruction.
5265 *
5266 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5267 *
5268 * The processing is transfered to SAX once parsed.
5269 */
5270
5271void
5272xmlParsePI(xmlParserCtxtPtr ctxt) {
5273 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005274 size_t len = 0;
5275 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005276 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005277 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005278 xmlParserInputState state;
5279 int count = 0;
5280
5281 if ((RAW == '<') && (NXT(1) == '?')) {
5282 xmlParserInputPtr input = ctxt->input;
5283 state = ctxt->instate;
5284 ctxt->instate = XML_PARSER_PI;
5285 /*
5286 * this is a Processing Instruction.
5287 */
5288 SKIP(2);
5289 SHRINK;
5290
5291 /*
5292 * Parse the target name and check for special support like
5293 * namespace.
5294 */
5295 target = xmlParsePITarget(ctxt);
5296 if (target != NULL) {
5297 if ((RAW == '?') && (NXT(1) == '>')) {
5298 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005299 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5300 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005301 }
5302 SKIP(2);
5303
5304 /*
5305 * SAX: PI detected.
5306 */
5307 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5308 (ctxt->sax->processingInstruction != NULL))
5309 ctxt->sax->processingInstruction(ctxt->userData,
5310 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005311 if (ctxt->instate != XML_PARSER_EOF)
5312 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005313 return;
5314 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005315 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005316 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005317 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005318 ctxt->instate = state;
5319 return;
5320 }
5321 cur = CUR;
5322 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005323 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5324 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005325 }
5326 SKIP_BLANKS;
5327 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005328 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005329 ((cur != '?') || (NXT(1) != '>'))) {
5330 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005331 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005332 size_t new_size = size * 2;
5333 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005334 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005335 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005336 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005337 ctxt->instate = state;
5338 return;
5339 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005340 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005341 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 }
5343 count++;
5344 if (count > 50) {
5345 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005346 if (ctxt->instate == XML_PARSER_EOF) {
5347 xmlFree(buf);
5348 return;
5349 }
Owen Taylor3473f882001-02-23 17:55:21 +00005350 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005351 if ((len > XML_MAX_TEXT_LENGTH) &&
5352 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5353 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5354 "PI %s too big found", target);
5355 xmlFree(buf);
5356 ctxt->instate = state;
5357 return;
5358 }
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360 COPY_BUF(l,buf,len,cur);
5361 NEXTL(l);
5362 cur = CUR_CHAR(l);
5363 if (cur == 0) {
5364 SHRINK;
5365 GROW;
5366 cur = CUR_CHAR(l);
5367 }
5368 }
Daniel Veillard51304812012-07-19 20:34:26 +08005369 if ((len > XML_MAX_TEXT_LENGTH) &&
5370 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5371 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5372 "PI %s too big found", target);
5373 xmlFree(buf);
5374 ctxt->instate = state;
5375 return;
5376 }
Owen Taylor3473f882001-02-23 17:55:21 +00005377 buf[len] = 0;
5378 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005379 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5380 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005381 } else {
5382 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005383 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5384 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005385 }
5386 SKIP(2);
5387
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005388#ifdef LIBXML_CATALOG_ENABLED
5389 if (((state == XML_PARSER_MISC) ||
5390 (state == XML_PARSER_START)) &&
5391 (xmlStrEqual(target, XML_CATALOG_PI))) {
5392 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5393 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5394 (allow == XML_CATA_ALLOW_ALL))
5395 xmlParseCatalogPI(ctxt, buf);
5396 }
5397#endif
5398
5399
Owen Taylor3473f882001-02-23 17:55:21 +00005400 /*
5401 * SAX: PI detected.
5402 */
5403 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5404 (ctxt->sax->processingInstruction != NULL))
5405 ctxt->sax->processingInstruction(ctxt->userData,
5406 target, buf);
5407 }
5408 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005410 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005411 }
Chris Evans77404b82011-12-14 16:18:25 +08005412 if (ctxt->instate != XML_PARSER_EOF)
5413 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005414 }
5415}
5416
5417/**
5418 * xmlParseNotationDecl:
5419 * @ctxt: an XML parser context
5420 *
5421 * parse a notation declaration
5422 *
5423 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5424 *
5425 * Hence there is actually 3 choices:
5426 * 'PUBLIC' S PubidLiteral
5427 * 'PUBLIC' S PubidLiteral S SystemLiteral
5428 * and 'SYSTEM' S SystemLiteral
5429 *
5430 * See the NOTE on xmlParseExternalID().
5431 */
5432
5433void
5434xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005435 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005436 xmlChar *Pubid;
5437 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005438
Daniel Veillarda07050d2003-10-19 14:46:32 +00005439 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005440 xmlParserInputPtr input = ctxt->input;
5441 SHRINK;
5442 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005443 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005444 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5445 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005446 return;
5447 }
5448 SKIP_BLANKS;
5449
Daniel Veillard76d66f42001-05-16 21:05:17 +00005450 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005451 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005452 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005453 return;
5454 }
William M. Brack76e95df2003-10-18 16:20:14 +00005455 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005456 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005457 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005458 return;
5459 }
Daniel Veillard37334572008-07-31 08:20:02 +00005460 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005461 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005462 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005463 name, NULL, NULL);
5464 }
Owen Taylor3473f882001-02-23 17:55:21 +00005465 SKIP_BLANKS;
5466
5467 /*
5468 * Parse the IDs.
5469 */
5470 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5471 SKIP_BLANKS;
5472
5473 if (RAW == '>') {
5474 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005475 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5476 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005477 }
5478 NEXT;
5479 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5480 (ctxt->sax->notationDecl != NULL))
5481 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5482 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005483 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005484 }
Owen Taylor3473f882001-02-23 17:55:21 +00005485 if (Systemid != NULL) xmlFree(Systemid);
5486 if (Pubid != NULL) xmlFree(Pubid);
5487 }
5488}
5489
5490/**
5491 * xmlParseEntityDecl:
5492 * @ctxt: an XML parser context
5493 *
5494 * parse <!ENTITY declarations
5495 *
5496 * [70] EntityDecl ::= GEDecl | PEDecl
5497 *
5498 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5499 *
5500 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5501 *
5502 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5503 *
5504 * [74] PEDef ::= EntityValue | ExternalID
5505 *
5506 * [76] NDataDecl ::= S 'NDATA' S Name
5507 *
5508 * [ VC: Notation Declared ]
5509 * The Name must match the declared name of a notation.
5510 */
5511
5512void
5513xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005514 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005515 xmlChar *value = NULL;
5516 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005517 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005518 int isParameter = 0;
5519 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005520 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005521
Daniel Veillard4c778d82005-01-23 17:37:44 +00005522 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005523 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005524 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005525 SHRINK;
5526 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005527 skipped = SKIP_BLANKS;
5528 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5530 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005531 }
Owen Taylor3473f882001-02-23 17:55:21 +00005532
5533 if (RAW == '%') {
5534 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005535 skipped = SKIP_BLANKS;
5536 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005537 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005538 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005539 }
Owen Taylor3473f882001-02-23 17:55:21 +00005540 isParameter = 1;
5541 }
5542
Daniel Veillard76d66f42001-05-16 21:05:17 +00005543 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005544 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005545 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5546 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005547 return;
5548 }
Daniel Veillard37334572008-07-31 08:20:02 +00005549 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005550 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005551 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005552 name, NULL, NULL);
5553 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005554 skipped = SKIP_BLANKS;
5555 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005556 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5557 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005558 }
Owen Taylor3473f882001-02-23 17:55:21 +00005559
Daniel Veillardf5582f12002-06-11 10:08:16 +00005560 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005561 /*
5562 * handle the various case of definitions...
5563 */
5564 if (isParameter) {
5565 if ((RAW == '"') || (RAW == '\'')) {
5566 value = xmlParseEntityValue(ctxt, &orig);
5567 if (value) {
5568 if ((ctxt->sax != NULL) &&
5569 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5570 ctxt->sax->entityDecl(ctxt->userData, name,
5571 XML_INTERNAL_PARAMETER_ENTITY,
5572 NULL, NULL, value);
5573 }
5574 } else {
5575 URI = xmlParseExternalID(ctxt, &literal, 1);
5576 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005577 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005578 }
5579 if (URI) {
5580 xmlURIPtr uri;
5581
5582 uri = xmlParseURI((const char *) URI);
5583 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005584 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5585 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005586 /*
5587 * This really ought to be a well formedness error
5588 * but the XML Core WG decided otherwise c.f. issue
5589 * E26 of the XML erratas.
5590 */
Owen Taylor3473f882001-02-23 17:55:21 +00005591 } else {
5592 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005593 /*
5594 * Okay this is foolish to block those but not
5595 * invalid URIs.
5596 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005597 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005598 } else {
5599 if ((ctxt->sax != NULL) &&
5600 (!ctxt->disableSAX) &&
5601 (ctxt->sax->entityDecl != NULL))
5602 ctxt->sax->entityDecl(ctxt->userData, name,
5603 XML_EXTERNAL_PARAMETER_ENTITY,
5604 literal, URI, NULL);
5605 }
5606 xmlFreeURI(uri);
5607 }
5608 }
5609 }
5610 } else {
5611 if ((RAW == '"') || (RAW == '\'')) {
5612 value = xmlParseEntityValue(ctxt, &orig);
5613 if ((ctxt->sax != NULL) &&
5614 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5615 ctxt->sax->entityDecl(ctxt->userData, name,
5616 XML_INTERNAL_GENERAL_ENTITY,
5617 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005618 /*
5619 * For expat compatibility in SAX mode.
5620 */
5621 if ((ctxt->myDoc == NULL) ||
5622 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5623 if (ctxt->myDoc == NULL) {
5624 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005625 if (ctxt->myDoc == NULL) {
5626 xmlErrMemory(ctxt, "New Doc failed");
5627 return;
5628 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005629 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005630 }
5631 if (ctxt->myDoc->intSubset == NULL)
5632 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5633 BAD_CAST "fake", NULL, NULL);
5634
Daniel Veillard1af9a412003-08-20 22:54:39 +00005635 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5636 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005637 }
Owen Taylor3473f882001-02-23 17:55:21 +00005638 } else {
5639 URI = xmlParseExternalID(ctxt, &literal, 1);
5640 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005641 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005642 }
5643 if (URI) {
5644 xmlURIPtr uri;
5645
5646 uri = xmlParseURI((const char *)URI);
5647 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005648 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5649 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005650 /*
5651 * This really ought to be a well formedness error
5652 * but the XML Core WG decided otherwise c.f. issue
5653 * E26 of the XML erratas.
5654 */
Owen Taylor3473f882001-02-23 17:55:21 +00005655 } else {
5656 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005657 /*
5658 * Okay this is foolish to block those but not
5659 * invalid URIs.
5660 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005661 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005662 }
5663 xmlFreeURI(uri);
5664 }
5665 }
William M. Brack76e95df2003-10-18 16:20:14 +00005666 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005667 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5668 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005669 }
5670 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005671 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005672 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005673 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005674 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5675 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005676 }
5677 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005678 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5680 (ctxt->sax->unparsedEntityDecl != NULL))
5681 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5682 literal, URI, ndata);
5683 } else {
5684 if ((ctxt->sax != NULL) &&
5685 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5686 ctxt->sax->entityDecl(ctxt->userData, name,
5687 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5688 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005689 /*
5690 * For expat compatibility in SAX mode.
5691 * assuming the entity repalcement was asked for
5692 */
5693 if ((ctxt->replaceEntities != 0) &&
5694 ((ctxt->myDoc == NULL) ||
5695 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5696 if (ctxt->myDoc == NULL) {
5697 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005698 if (ctxt->myDoc == NULL) {
5699 xmlErrMemory(ctxt, "New Doc failed");
5700 return;
5701 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005702 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005703 }
5704
5705 if (ctxt->myDoc->intSubset == NULL)
5706 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5707 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005708 xmlSAX2EntityDecl(ctxt, name,
5709 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5710 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005711 }
Owen Taylor3473f882001-02-23 17:55:21 +00005712 }
5713 }
5714 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005715 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005716 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005717 SKIP_BLANKS;
5718 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005719 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005720 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005721 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005722 } else {
5723 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005724 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5725 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005726 }
5727 NEXT;
5728 }
5729 if (orig != NULL) {
5730 /*
5731 * Ugly mechanism to save the raw entity value.
5732 */
5733 xmlEntityPtr cur = NULL;
5734
5735 if (isParameter) {
5736 if ((ctxt->sax != NULL) &&
5737 (ctxt->sax->getParameterEntity != NULL))
5738 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5739 } else {
5740 if ((ctxt->sax != NULL) &&
5741 (ctxt->sax->getEntity != NULL))
5742 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005743 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005744 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005745 }
Owen Taylor3473f882001-02-23 17:55:21 +00005746 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005747 if ((cur != NULL) && (cur->orig == NULL)) {
5748 cur->orig = orig;
5749 orig = NULL;
5750 }
Owen Taylor3473f882001-02-23 17:55:21 +00005751 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005752
5753done:
Owen Taylor3473f882001-02-23 17:55:21 +00005754 if (value != NULL) xmlFree(value);
5755 if (URI != NULL) xmlFree(URI);
5756 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005757 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 }
5759}
5760
5761/**
5762 * xmlParseDefaultDecl:
5763 * @ctxt: an XML parser context
5764 * @value: Receive a possible fixed default value for the attribute
5765 *
5766 * Parse an attribute default declaration
5767 *
5768 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5769 *
5770 * [ VC: Required Attribute ]
5771 * if the default declaration is the keyword #REQUIRED, then the
5772 * attribute must be specified for all elements of the type in the
5773 * attribute-list declaration.
5774 *
5775 * [ VC: Attribute Default Legal ]
5776 * The declared default value must meet the lexical constraints of
5777 * the declared attribute type c.f. xmlValidateAttributeDecl()
5778 *
5779 * [ VC: Fixed Attribute Default ]
5780 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005781 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005782 *
5783 * [ WFC: No < in Attribute Values ]
5784 * handled in xmlParseAttValue()
5785 *
5786 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005787 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005788 */
5789
5790int
5791xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5792 int val;
5793 xmlChar *ret;
5794
5795 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005796 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005797 SKIP(9);
5798 return(XML_ATTRIBUTE_REQUIRED);
5799 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005800 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005801 SKIP(8);
5802 return(XML_ATTRIBUTE_IMPLIED);
5803 }
5804 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005805 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005806 SKIP(6);
5807 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005808 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005809 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5810 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005811 }
5812 SKIP_BLANKS;
5813 }
5814 ret = xmlParseAttValue(ctxt);
5815 ctxt->instate = XML_PARSER_DTD;
5816 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005817 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005818 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005819 } else
5820 *value = ret;
5821 return(val);
5822}
5823
5824/**
5825 * xmlParseNotationType:
5826 * @ctxt: an XML parser context
5827 *
5828 * parse an Notation attribute type.
5829 *
5830 * Note: the leading 'NOTATION' S part has already being parsed...
5831 *
5832 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5833 *
5834 * [ VC: Notation Attributes ]
5835 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005836 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005837 *
5838 * Returns: the notation attribute tree built while parsing
5839 */
5840
5841xmlEnumerationPtr
5842xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005843 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005844 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005845
5846 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005847 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005848 return(NULL);
5849 }
5850 SHRINK;
5851 do {
5852 NEXT;
5853 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005854 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005855 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005856 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5857 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005858 xmlFreeEnumeration(ret);
5859 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005860 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005861 tmp = ret;
5862 while (tmp != NULL) {
5863 if (xmlStrEqual(name, tmp->name)) {
5864 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5865 "standalone: attribute notation value token %s duplicated\n",
5866 name, NULL);
5867 if (!xmlDictOwns(ctxt->dict, name))
5868 xmlFree((xmlChar *) name);
5869 break;
5870 }
5871 tmp = tmp->next;
5872 }
5873 if (tmp == NULL) {
5874 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005875 if (cur == NULL) {
5876 xmlFreeEnumeration(ret);
5877 return(NULL);
5878 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005879 if (last == NULL) ret = last = cur;
5880 else {
5881 last->next = cur;
5882 last = cur;
5883 }
Owen Taylor3473f882001-02-23 17:55:21 +00005884 }
5885 SKIP_BLANKS;
5886 } while (RAW == '|');
5887 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005888 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005889 xmlFreeEnumeration(ret);
5890 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 }
5892 NEXT;
5893 return(ret);
5894}
5895
5896/**
5897 * xmlParseEnumerationType:
5898 * @ctxt: an XML parser context
5899 *
5900 * parse an Enumeration attribute type.
5901 *
5902 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5903 *
5904 * [ VC: Enumeration ]
5905 * Values of this type must match one of the Nmtoken tokens in
5906 * the declaration
5907 *
5908 * Returns: the enumeration attribute tree built while parsing
5909 */
5910
5911xmlEnumerationPtr
5912xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5913 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005914 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005915
5916 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005917 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005918 return(NULL);
5919 }
5920 SHRINK;
5921 do {
5922 NEXT;
5923 SKIP_BLANKS;
5924 name = xmlParseNmtoken(ctxt);
5925 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005926 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 return(ret);
5928 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005929 tmp = ret;
5930 while (tmp != NULL) {
5931 if (xmlStrEqual(name, tmp->name)) {
5932 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5933 "standalone: attribute enumeration value token %s duplicated\n",
5934 name, NULL);
5935 if (!xmlDictOwns(ctxt->dict, name))
5936 xmlFree(name);
5937 break;
5938 }
5939 tmp = tmp->next;
5940 }
5941 if (tmp == NULL) {
5942 cur = xmlCreateEnumeration(name);
5943 if (!xmlDictOwns(ctxt->dict, name))
5944 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005945 if (cur == NULL) {
5946 xmlFreeEnumeration(ret);
5947 return(NULL);
5948 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005949 if (last == NULL) ret = last = cur;
5950 else {
5951 last->next = cur;
5952 last = cur;
5953 }
Owen Taylor3473f882001-02-23 17:55:21 +00005954 }
5955 SKIP_BLANKS;
5956 } while (RAW == '|');
5957 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005958 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005959 return(ret);
5960 }
5961 NEXT;
5962 return(ret);
5963}
5964
5965/**
5966 * xmlParseEnumeratedType:
5967 * @ctxt: an XML parser context
5968 * @tree: the enumeration tree built while parsing
5969 *
5970 * parse an Enumerated attribute type.
5971 *
5972 * [57] EnumeratedType ::= NotationType | Enumeration
5973 *
5974 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5975 *
5976 *
5977 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5978 */
5979
5980int
5981xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005982 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005983 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005984 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005985 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5986 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005987 return(0);
5988 }
5989 SKIP_BLANKS;
5990 *tree = xmlParseNotationType(ctxt);
5991 if (*tree == NULL) return(0);
5992 return(XML_ATTRIBUTE_NOTATION);
5993 }
5994 *tree = xmlParseEnumerationType(ctxt);
5995 if (*tree == NULL) return(0);
5996 return(XML_ATTRIBUTE_ENUMERATION);
5997}
5998
5999/**
6000 * xmlParseAttributeType:
6001 * @ctxt: an XML parser context
6002 * @tree: the enumeration tree built while parsing
6003 *
6004 * parse the Attribute list def for an element
6005 *
6006 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6007 *
6008 * [55] StringType ::= 'CDATA'
6009 *
6010 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6011 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6012 *
6013 * Validity constraints for attribute values syntax are checked in
6014 * xmlValidateAttributeValue()
6015 *
6016 * [ VC: ID ]
6017 * Values of type ID must match the Name production. A name must not
6018 * appear more than once in an XML document as a value of this type;
6019 * i.e., ID values must uniquely identify the elements which bear them.
6020 *
6021 * [ VC: One ID per Element Type ]
6022 * No element type may have more than one ID attribute specified.
6023 *
6024 * [ VC: ID Attribute Default ]
6025 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6026 *
6027 * [ VC: IDREF ]
6028 * Values of type IDREF must match the Name production, and values
6029 * of type IDREFS must match Names; each IDREF Name must match the value
6030 * of an ID attribute on some element in the XML document; i.e. IDREF
6031 * values must match the value of some ID attribute.
6032 *
6033 * [ VC: Entity Name ]
6034 * Values of type ENTITY must match the Name production, values
6035 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006036 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00006037 *
6038 * [ VC: Name Token ]
6039 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006040 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00006041 *
6042 * Returns the attribute type
6043 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006044int
Owen Taylor3473f882001-02-23 17:55:21 +00006045xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6046 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006047 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006048 SKIP(5);
6049 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006050 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006051 SKIP(6);
6052 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006053 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006054 SKIP(5);
6055 return(XML_ATTRIBUTE_IDREF);
6056 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6057 SKIP(2);
6058 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006059 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006060 SKIP(6);
6061 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006062 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006063 SKIP(8);
6064 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006065 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006066 SKIP(8);
6067 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006068 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006069 SKIP(7);
6070 return(XML_ATTRIBUTE_NMTOKEN);
6071 }
6072 return(xmlParseEnumeratedType(ctxt, tree));
6073}
6074
6075/**
6076 * xmlParseAttributeListDecl:
6077 * @ctxt: an XML parser context
6078 *
6079 * : parse the Attribute list def for an element
6080 *
6081 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6082 *
6083 * [53] AttDef ::= S Name S AttType S DefaultDecl
6084 *
6085 */
6086void
6087xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006088 const xmlChar *elemName;
6089 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00006090 xmlEnumerationPtr tree;
6091
Daniel Veillarda07050d2003-10-19 14:46:32 +00006092 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006093 xmlParserInputPtr input = ctxt->input;
6094
6095 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006096 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006097 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006098 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006099 }
6100 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006101 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006102 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006103 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6104 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006105 return;
6106 }
6107 SKIP_BLANKS;
6108 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006109 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006110 int type;
6111 int def;
6112 xmlChar *defaultValue = NULL;
6113
6114 GROW;
6115 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006116 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006117 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006118 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6119 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006120 break;
6121 }
6122 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006123 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006124 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006125 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006126 break;
6127 }
6128 SKIP_BLANKS;
6129
6130 type = xmlParseAttributeType(ctxt, &tree);
6131 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006132 break;
6133 }
6134
6135 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006136 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6138 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006139 if (tree != NULL)
6140 xmlFreeEnumeration(tree);
6141 break;
6142 }
6143 SKIP_BLANKS;
6144
6145 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6146 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006147 if (defaultValue != NULL)
6148 xmlFree(defaultValue);
6149 if (tree != NULL)
6150 xmlFreeEnumeration(tree);
6151 break;
6152 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006153 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6154 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006155
6156 GROW;
6157 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006158 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006160 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006161 if (defaultValue != NULL)
6162 xmlFree(defaultValue);
6163 if (tree != NULL)
6164 xmlFreeEnumeration(tree);
6165 break;
6166 }
6167 SKIP_BLANKS;
6168 }
Owen Taylor3473f882001-02-23 17:55:21 +00006169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6170 (ctxt->sax->attributeDecl != NULL))
6171 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6172 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006173 else if (tree != NULL)
6174 xmlFreeEnumeration(tree);
6175
6176 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006177 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006178 (def != XML_ATTRIBUTE_REQUIRED)) {
6179 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6180 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006181 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006182 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6183 }
Owen Taylor3473f882001-02-23 17:55:21 +00006184 if (defaultValue != NULL)
6185 xmlFree(defaultValue);
6186 GROW;
6187 }
6188 if (RAW == '>') {
6189 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006190 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6191 "Attribute list declaration doesn't start and stop in the same entity\n",
6192 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006193 }
6194 NEXT;
6195 }
Owen Taylor3473f882001-02-23 17:55:21 +00006196 }
6197}
6198
6199/**
6200 * xmlParseElementMixedContentDecl:
6201 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006202 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006203 *
6204 * parse the declaration for a Mixed Element content
6205 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006206 *
Owen Taylor3473f882001-02-23 17:55:21 +00006207 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6208 * '(' S? '#PCDATA' S? ')'
6209 *
6210 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6211 *
6212 * [ VC: No Duplicate Types ]
6213 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006214 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006215 *
6216 * returns: the list of the xmlElementContentPtr describing the element choices
6217 */
6218xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006219xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006220 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006221 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006222
6223 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006224 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006225 SKIP(7);
6226 SKIP_BLANKS;
6227 SHRINK;
6228 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006229 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006230 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6231"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006232 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006233 }
Owen Taylor3473f882001-02-23 17:55:21 +00006234 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006235 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006236 if (ret == NULL)
6237 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006238 if (RAW == '*') {
6239 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6240 NEXT;
6241 }
6242 return(ret);
6243 }
6244 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006245 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006246 if (ret == NULL) return(NULL);
6247 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006248 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006249 NEXT;
6250 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006251 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006252 if (ret == NULL) return(NULL);
6253 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006254 if (cur != NULL)
6255 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006256 cur = ret;
6257 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006258 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006259 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006260 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006261 if (n->c1 != NULL)
6262 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006263 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006264 if (n != NULL)
6265 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006266 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006267 }
6268 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006269 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006270 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006271 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006272 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006273 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006274 return(NULL);
6275 }
6276 SKIP_BLANKS;
6277 GROW;
6278 }
6279 if ((RAW == ')') && (NXT(1) == '*')) {
6280 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006281 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006282 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006283 if (cur->c2 != NULL)
6284 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006285 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006286 if (ret != NULL)
6287 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006288 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006289 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6290"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006291 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006292 }
Owen Taylor3473f882001-02-23 17:55:21 +00006293 SKIP(2);
6294 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006295 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006296 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006297 return(NULL);
6298 }
6299
6300 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006301 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006302 }
6303 return(ret);
6304}
6305
6306/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006307 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006308 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006309 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006310 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006311 *
6312 * parse the declaration for a Mixed Element content
6313 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006314 *
Owen Taylor3473f882001-02-23 17:55:21 +00006315 *
6316 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6317 *
6318 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6319 *
6320 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6321 *
6322 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6323 *
6324 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6325 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006326 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006327 * opening or closing parentheses in a choice, seq, or Mixed
6328 * construct is contained in the replacement text for a parameter
6329 * entity, both must be contained in the same replacement text. For
6330 * interoperability, if a parameter-entity reference appears in a
6331 * choice, seq, or Mixed construct, its replacement text should not
6332 * be empty, and neither the first nor last non-blank character of
6333 * the replacement text should be a connector (| or ,).
6334 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006335 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006336 * hierarchy.
6337 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006338static xmlElementContentPtr
6339xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6340 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006341 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006342 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006343 xmlChar type = 0;
6344
Daniel Veillard489f9672009-08-10 16:49:30 +02006345 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6346 (depth > 2048)) {
6347 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6348"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6349 depth);
6350 return(NULL);
6351 }
Owen Taylor3473f882001-02-23 17:55:21 +00006352 SKIP_BLANKS;
6353 GROW;
6354 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006355 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006356
Owen Taylor3473f882001-02-23 17:55:21 +00006357 /* Recurse on first child */
6358 NEXT;
6359 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006360 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6361 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006362 SKIP_BLANKS;
6363 GROW;
6364 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006365 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006366 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006367 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006368 return(NULL);
6369 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006370 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006371 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006372 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006373 return(NULL);
6374 }
Owen Taylor3473f882001-02-23 17:55:21 +00006375 GROW;
6376 if (RAW == '?') {
6377 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6378 NEXT;
6379 } else if (RAW == '*') {
6380 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6381 NEXT;
6382 } else if (RAW == '+') {
6383 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6384 NEXT;
6385 } else {
6386 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6387 }
Owen Taylor3473f882001-02-23 17:55:21 +00006388 GROW;
6389 }
6390 SKIP_BLANKS;
6391 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006392 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006393 /*
6394 * Each loop we parse one separator and one element.
6395 */
6396 if (RAW == ',') {
6397 if (type == 0) type = CUR;
6398
6399 /*
6400 * Detect "Name | Name , Name" error
6401 */
6402 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006403 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006404 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006405 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006406 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006407 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006408 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006409 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006410 return(NULL);
6411 }
6412 NEXT;
6413
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006414 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006415 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006416 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006417 xmlFreeDocElementContent(ctxt->myDoc, last);
6418 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006419 return(NULL);
6420 }
6421 if (last == NULL) {
6422 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006423 if (ret != NULL)
6424 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006425 ret = cur = op;
6426 } else {
6427 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006428 if (op != NULL)
6429 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006430 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006431 if (last != NULL)
6432 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006433 cur =op;
6434 last = NULL;
6435 }
6436 } else if (RAW == '|') {
6437 if (type == 0) type = CUR;
6438
6439 /*
6440 * Detect "Name , Name | Name" error
6441 */
6442 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006443 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006444 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006445 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006446 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006447 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006448 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006449 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006450 return(NULL);
6451 }
6452 NEXT;
6453
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006454 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006455 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006456 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006457 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006458 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006459 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006460 return(NULL);
6461 }
6462 if (last == NULL) {
6463 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006464 if (ret != NULL)
6465 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006466 ret = cur = op;
6467 } else {
6468 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006469 if (op != NULL)
6470 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006471 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006472 if (last != NULL)
6473 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006474 cur =op;
6475 last = NULL;
6476 }
6477 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006478 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006479 if ((last != NULL) && (last != ret))
6480 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006481 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006482 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006483 return(NULL);
6484 }
6485 GROW;
6486 SKIP_BLANKS;
6487 GROW;
6488 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006489 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006490 /* Recurse on second child */
6491 NEXT;
6492 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006493 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6494 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006495 SKIP_BLANKS;
6496 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006497 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006498 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006499 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006500 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006501 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006502 return(NULL);
6503 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006504 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006505 if (last == NULL) {
6506 if (ret != NULL)
6507 xmlFreeDocElementContent(ctxt->myDoc, ret);
6508 return(NULL);
6509 }
Owen Taylor3473f882001-02-23 17:55:21 +00006510 if (RAW == '?') {
6511 last->ocur = XML_ELEMENT_CONTENT_OPT;
6512 NEXT;
6513 } else if (RAW == '*') {
6514 last->ocur = XML_ELEMENT_CONTENT_MULT;
6515 NEXT;
6516 } else if (RAW == '+') {
6517 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6518 NEXT;
6519 } else {
6520 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6521 }
6522 }
6523 SKIP_BLANKS;
6524 GROW;
6525 }
6526 if ((cur != NULL) && (last != NULL)) {
6527 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006528 if (last != NULL)
6529 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006530 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006531 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006532 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6533"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006534 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006535 }
Owen Taylor3473f882001-02-23 17:55:21 +00006536 NEXT;
6537 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006538 if (ret != NULL) {
6539 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6540 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6541 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6542 else
6543 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6544 }
Owen Taylor3473f882001-02-23 17:55:21 +00006545 NEXT;
6546 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006547 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006548 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006549 cur = ret;
6550 /*
6551 * Some normalization:
6552 * (a | b* | c?)* == (a | b | c)*
6553 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006554 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006555 if ((cur->c1 != NULL) &&
6556 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6557 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6558 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6559 if ((cur->c2 != NULL) &&
6560 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6561 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6562 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6563 cur = cur->c2;
6564 }
6565 }
Owen Taylor3473f882001-02-23 17:55:21 +00006566 NEXT;
6567 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006568 if (ret != NULL) {
6569 int found = 0;
6570
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006571 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6572 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6573 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006574 else
6575 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006576 /*
6577 * Some normalization:
6578 * (a | b*)+ == (a | b)*
6579 * (a | b?)+ == (a | b)*
6580 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006581 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006582 if ((cur->c1 != NULL) &&
6583 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6584 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6585 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6586 found = 1;
6587 }
6588 if ((cur->c2 != NULL) &&
6589 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6590 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6591 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6592 found = 1;
6593 }
6594 cur = cur->c2;
6595 }
6596 if (found)
6597 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6598 }
Owen Taylor3473f882001-02-23 17:55:21 +00006599 NEXT;
6600 }
6601 return(ret);
6602}
6603
6604/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006605 * xmlParseElementChildrenContentDecl:
6606 * @ctxt: an XML parser context
6607 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006608 *
6609 * parse the declaration for a Mixed Element content
6610 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6611 *
6612 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6613 *
6614 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6615 *
6616 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6617 *
6618 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6619 *
6620 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6621 * TODO Parameter-entity replacement text must be properly nested
6622 * with parenthesized groups. That is to say, if either of the
6623 * opening or closing parentheses in a choice, seq, or Mixed
6624 * construct is contained in the replacement text for a parameter
6625 * entity, both must be contained in the same replacement text. For
6626 * interoperability, if a parameter-entity reference appears in a
6627 * choice, seq, or Mixed construct, its replacement text should not
6628 * be empty, and neither the first nor last non-blank character of
6629 * the replacement text should be a connector (| or ,).
6630 *
6631 * Returns the tree of xmlElementContentPtr describing the element
6632 * hierarchy.
6633 */
6634xmlElementContentPtr
6635xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6636 /* stub left for API/ABI compat */
6637 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6638}
6639
6640/**
Owen Taylor3473f882001-02-23 17:55:21 +00006641 * xmlParseElementContentDecl:
6642 * @ctxt: an XML parser context
6643 * @name: the name of the element being defined.
6644 * @result: the Element Content pointer will be stored here if any
6645 *
6646 * parse the declaration for an Element content either Mixed or Children,
6647 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006648 *
Owen Taylor3473f882001-02-23 17:55:21 +00006649 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6650 *
6651 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6652 */
6653
6654int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006655xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006656 xmlElementContentPtr *result) {
6657
6658 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006659 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006660 int res;
6661
6662 *result = NULL;
6663
6664 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006665 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006666 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006667 return(-1);
6668 }
6669 NEXT;
6670 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006671 if (ctxt->instate == XML_PARSER_EOF)
6672 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006673 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006674 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006675 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006676 res = XML_ELEMENT_TYPE_MIXED;
6677 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006678 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006679 res = XML_ELEMENT_TYPE_ELEMENT;
6680 }
Owen Taylor3473f882001-02-23 17:55:21 +00006681 SKIP_BLANKS;
6682 *result = tree;
6683 return(res);
6684}
6685
6686/**
6687 * xmlParseElementDecl:
6688 * @ctxt: an XML parser context
6689 *
6690 * parse an Element declaration.
6691 *
6692 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6693 *
6694 * [ VC: Unique Element Type Declaration ]
6695 * No element type may be declared more than once
6696 *
6697 * Returns the type of the element, or -1 in case of error
6698 */
6699int
6700xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006701 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006702 int ret = -1;
6703 xmlElementContentPtr content = NULL;
6704
Daniel Veillard4c778d82005-01-23 17:37:44 +00006705 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006706 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006707 xmlParserInputPtr input = ctxt->input;
6708
6709 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006710 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006711 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6712 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006713 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006714 }
6715 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006716 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006717 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006718 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6719 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006720 return(-1);
6721 }
6722 while ((RAW == 0) && (ctxt->inputNr > 1))
6723 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006724 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006725 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6726 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006727 }
6728 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006729 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006730 SKIP(5);
6731 /*
6732 * Element must always be empty.
6733 */
6734 ret = XML_ELEMENT_TYPE_EMPTY;
6735 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6736 (NXT(2) == 'Y')) {
6737 SKIP(3);
6738 /*
6739 * Element is a generic container.
6740 */
6741 ret = XML_ELEMENT_TYPE_ANY;
6742 } else if (RAW == '(') {
6743 ret = xmlParseElementContentDecl(ctxt, name, &content);
6744 } else {
6745 /*
6746 * [ WFC: PEs in Internal Subset ] error handling.
6747 */
6748 if ((RAW == '%') && (ctxt->external == 0) &&
6749 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006750 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006751 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006752 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006753 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006754 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6755 }
Owen Taylor3473f882001-02-23 17:55:21 +00006756 return(-1);
6757 }
6758
6759 SKIP_BLANKS;
6760 /*
6761 * Pop-up of finished entities.
6762 */
6763 while ((RAW == 0) && (ctxt->inputNr > 1))
6764 xmlPopInput(ctxt);
6765 SKIP_BLANKS;
6766
6767 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006768 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006769 if (content != NULL) {
6770 xmlFreeDocElementContent(ctxt->myDoc, content);
6771 }
Owen Taylor3473f882001-02-23 17:55:21 +00006772 } else {
6773 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006774 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6775 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006776 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006777
Owen Taylor3473f882001-02-23 17:55:21 +00006778 NEXT;
6779 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006780 (ctxt->sax->elementDecl != NULL)) {
6781 if (content != NULL)
6782 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006783 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6784 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006785 if ((content != NULL) && (content->parent == NULL)) {
6786 /*
6787 * this is a trick: if xmlAddElementDecl is called,
6788 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006789 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006790 * interfaces or change the API/ABI
6791 */
6792 xmlFreeDocElementContent(ctxt->myDoc, content);
6793 }
6794 } else if (content != NULL) {
6795 xmlFreeDocElementContent(ctxt->myDoc, content);
6796 }
Owen Taylor3473f882001-02-23 17:55:21 +00006797 }
Owen Taylor3473f882001-02-23 17:55:21 +00006798 }
6799 return(ret);
6800}
6801
6802/**
Owen Taylor3473f882001-02-23 17:55:21 +00006803 * xmlParseConditionalSections
6804 * @ctxt: an XML parser context
6805 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006806 * [61] conditionalSect ::= includeSect | ignoreSect
6807 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006808 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6809 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6810 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6811 */
6812
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006813static void
Owen Taylor3473f882001-02-23 17:55:21 +00006814xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006815 int id = ctxt->input->id;
6816
Owen Taylor3473f882001-02-23 17:55:21 +00006817 SKIP(3);
6818 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006819 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006820 SKIP(7);
6821 SKIP_BLANKS;
6822 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006823 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006824 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006825 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006826 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006827 if (ctxt->input->id != id) {
6828 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6829 "All markup of the conditional section is not in the same entity\n",
6830 NULL, NULL);
6831 }
Owen Taylor3473f882001-02-23 17:55:21 +00006832 NEXT;
6833 }
6834 if (xmlParserDebugEntities) {
6835 if ((ctxt->input != NULL) && (ctxt->input->filename))
6836 xmlGenericError(xmlGenericErrorContext,
6837 "%s(%d): ", ctxt->input->filename,
6838 ctxt->input->line);
6839 xmlGenericError(xmlGenericErrorContext,
6840 "Entering INCLUDE Conditional Section\n");
6841 }
6842
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006843 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6844 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006845 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006846 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006847
6848 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6849 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006850 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006851 NEXT;
6852 } else if (RAW == '%') {
6853 xmlParsePEReference(ctxt);
6854 } else
6855 xmlParseMarkupDecl(ctxt);
6856
6857 /*
6858 * Pop-up of finished entities.
6859 */
6860 while ((RAW == 0) && (ctxt->inputNr > 1))
6861 xmlPopInput(ctxt);
6862
Daniel Veillardfdc91562002-07-01 21:52:03 +00006863 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006864 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006865 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006866 break;
6867 }
6868 }
6869 if (xmlParserDebugEntities) {
6870 if ((ctxt->input != NULL) && (ctxt->input->filename))
6871 xmlGenericError(xmlGenericErrorContext,
6872 "%s(%d): ", ctxt->input->filename,
6873 ctxt->input->line);
6874 xmlGenericError(xmlGenericErrorContext,
6875 "Leaving INCLUDE Conditional Section\n");
6876 }
6877
Daniel Veillarda07050d2003-10-19 14:46:32 +00006878 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006879 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006880 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006881 int depth = 0;
6882
6883 SKIP(6);
6884 SKIP_BLANKS;
6885 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006886 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006887 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006888 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006889 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006890 if (ctxt->input->id != id) {
6891 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6892 "All markup of the conditional section is not in the same entity\n",
6893 NULL, NULL);
6894 }
Owen Taylor3473f882001-02-23 17:55:21 +00006895 NEXT;
6896 }
6897 if (xmlParserDebugEntities) {
6898 if ((ctxt->input != NULL) && (ctxt->input->filename))
6899 xmlGenericError(xmlGenericErrorContext,
6900 "%s(%d): ", ctxt->input->filename,
6901 ctxt->input->line);
6902 xmlGenericError(xmlGenericErrorContext,
6903 "Entering IGNORE Conditional Section\n");
6904 }
6905
6906 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006907 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006908 * But disable SAX event generating DTD building in the meantime
6909 */
6910 state = ctxt->disableSAX;
6911 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006912 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006913 ctxt->instate = XML_PARSER_IGNORE;
6914
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006915 while (((depth >= 0) && (RAW != 0)) &&
6916 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006917 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6918 depth++;
6919 SKIP(3);
6920 continue;
6921 }
6922 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6923 if (--depth >= 0) SKIP(3);
6924 continue;
6925 }
6926 NEXT;
6927 continue;
6928 }
6929
6930 ctxt->disableSAX = state;
6931 ctxt->instate = instate;
6932
6933 if (xmlParserDebugEntities) {
6934 if ((ctxt->input != NULL) && (ctxt->input->filename))
6935 xmlGenericError(xmlGenericErrorContext,
6936 "%s(%d): ", ctxt->input->filename,
6937 ctxt->input->line);
6938 xmlGenericError(xmlGenericErrorContext,
6939 "Leaving IGNORE Conditional Section\n");
6940 }
6941
6942 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006943 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006944 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006945 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006946 }
6947
6948 if (RAW == 0)
6949 SHRINK;
6950
6951 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006952 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006953 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006954 if (ctxt->input->id != id) {
6955 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6956 "All markup of the conditional section is not in the same entity\n",
6957 NULL, NULL);
6958 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006959 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006960 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006961 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006962 }
6963}
6964
6965/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006966 * xmlParseMarkupDecl:
6967 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006968 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006969 * parse Markup declarations
6970 *
6971 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6972 * NotationDecl | PI | Comment
6973 *
6974 * [ VC: Proper Declaration/PE Nesting ]
6975 * Parameter-entity replacement text must be properly nested with
6976 * markup declarations. That is to say, if either the first character
6977 * or the last character of a markup declaration (markupdecl above) is
6978 * contained in the replacement text for a parameter-entity reference,
6979 * both must be contained in the same replacement text.
6980 *
6981 * [ WFC: PEs in Internal Subset ]
6982 * In the internal DTD subset, parameter-entity references can occur
6983 * only where markup declarations can occur, not within markup declarations.
6984 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006985 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006986 */
6987void
6988xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6989 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006990 if (CUR == '<') {
6991 if (NXT(1) == '!') {
6992 switch (NXT(2)) {
6993 case 'E':
6994 if (NXT(3) == 'L')
6995 xmlParseElementDecl(ctxt);
6996 else if (NXT(3) == 'N')
6997 xmlParseEntityDecl(ctxt);
6998 break;
6999 case 'A':
7000 xmlParseAttributeListDecl(ctxt);
7001 break;
7002 case 'N':
7003 xmlParseNotationDecl(ctxt);
7004 break;
7005 case '-':
7006 xmlParseComment(ctxt);
7007 break;
7008 default:
7009 /* there is an error but it will be detected later */
7010 break;
7011 }
7012 } else if (NXT(1) == '?') {
7013 xmlParsePI(ctxt);
7014 }
7015 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08007016
7017 /*
7018 * detect requirement to exit there and act accordingly
7019 * and avoid having instate overriden later on
7020 */
7021 if (ctxt->instate == XML_PARSER_EOF)
7022 return;
7023
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007024 /*
7025 * This is only for internal subset. On external entities,
7026 * the replacement is done before parsing stage
7027 */
7028 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7029 xmlParsePEReference(ctxt);
7030
7031 /*
7032 * Conditional sections are allowed from entities included
7033 * by PE References in the internal subset.
7034 */
7035 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7036 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7037 xmlParseConditionalSections(ctxt);
7038 }
7039 }
7040
7041 ctxt->instate = XML_PARSER_DTD;
7042}
7043
7044/**
7045 * xmlParseTextDecl:
7046 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00007047 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007048 * parse an XML declaration header for external entities
7049 *
7050 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007051 */
7052
7053void
7054xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7055 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007056 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007057
7058 /*
7059 * We know that '<?xml' is here.
7060 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007061 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007062 SKIP(5);
7063 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007064 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007065 return;
7066 }
7067
William M. Brack76e95df2003-10-18 16:20:14 +00007068 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007069 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7070 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007071 }
7072 SKIP_BLANKS;
7073
7074 /*
7075 * We may have the VersionInfo here.
7076 */
7077 version = xmlParseVersionInfo(ctxt);
7078 if (version == NULL)
7079 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00007080 else {
William M. Brack76e95df2003-10-18 16:20:14 +00007081 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007082 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7083 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00007084 }
7085 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007086 ctxt->input->version = version;
7087
7088 /*
7089 * We must have the encoding declaration
7090 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007091 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007092 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7093 /*
7094 * The XML REC instructs us to stop parsing right here
7095 */
7096 return;
7097 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007098 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7099 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7100 "Missing encoding in text declaration\n");
7101 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007102
7103 SKIP_BLANKS;
7104 if ((RAW == '?') && (NXT(1) == '>')) {
7105 SKIP(2);
7106 } else if (RAW == '>') {
7107 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007108 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007109 NEXT;
7110 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007111 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007112 MOVETO_ENDTAG(CUR_PTR);
7113 NEXT;
7114 }
7115}
7116
7117/**
Owen Taylor3473f882001-02-23 17:55:21 +00007118 * xmlParseExternalSubset:
7119 * @ctxt: an XML parser context
7120 * @ExternalID: the external identifier
7121 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007122 *
Owen Taylor3473f882001-02-23 17:55:21 +00007123 * parse Markup declarations from an external subset
7124 *
7125 * [30] extSubset ::= textDecl? extSubsetDecl
7126 *
7127 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7128 */
7129void
7130xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7131 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007132 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007133 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007134
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007135 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007136 (ctxt->input->end - ctxt->input->cur >= 4)) {
7137 xmlChar start[4];
7138 xmlCharEncoding enc;
7139
7140 start[0] = RAW;
7141 start[1] = NXT(1);
7142 start[2] = NXT(2);
7143 start[3] = NXT(3);
7144 enc = xmlDetectCharEncoding(start, 4);
7145 if (enc != XML_CHAR_ENCODING_NONE)
7146 xmlSwitchEncoding(ctxt, enc);
7147 }
7148
Daniel Veillarda07050d2003-10-19 14:46:32 +00007149 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007150 xmlParseTextDecl(ctxt);
7151 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7152 /*
7153 * The XML REC instructs us to stop parsing right here
7154 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007155 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007156 return;
7157 }
7158 }
7159 if (ctxt->myDoc == NULL) {
7160 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007161 if (ctxt->myDoc == NULL) {
7162 xmlErrMemory(ctxt, "New Doc failed");
7163 return;
7164 }
7165 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007166 }
7167 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7168 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7169
7170 ctxt->instate = XML_PARSER_DTD;
7171 ctxt->external = 1;
7172 while (((RAW == '<') && (NXT(1) == '?')) ||
7173 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007174 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007175 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007176 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007177
7178 GROW;
7179 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7180 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007181 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007182 NEXT;
7183 } else if (RAW == '%') {
7184 xmlParsePEReference(ctxt);
7185 } else
7186 xmlParseMarkupDecl(ctxt);
7187
7188 /*
7189 * Pop-up of finished entities.
7190 */
7191 while ((RAW == 0) && (ctxt->inputNr > 1))
7192 xmlPopInput(ctxt);
7193
Daniel Veillardfdc91562002-07-01 21:52:03 +00007194 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007195 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007196 break;
7197 }
7198 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007199
Owen Taylor3473f882001-02-23 17:55:21 +00007200 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007201 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007202 }
7203
7204}
7205
7206/**
7207 * xmlParseReference:
7208 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007209 *
Owen Taylor3473f882001-02-23 17:55:21 +00007210 * parse and handle entity references in content, depending on the SAX
7211 * interface, this may end-up in a call to character() if this is a
7212 * CharRef, a predefined entity, if there is no reference() callback.
7213 * or if the parser was asked to switch to that mode.
7214 *
7215 * [67] Reference ::= EntityRef | CharRef
7216 */
7217void
7218xmlParseReference(xmlParserCtxtPtr ctxt) {
7219 xmlEntityPtr ent;
7220 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007221 int was_checked;
7222 xmlNodePtr list = NULL;
7223 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007224
Daniel Veillard0161e632008-08-28 15:36:32 +00007225
7226 if (RAW != '&')
7227 return;
7228
7229 /*
7230 * Simple case of a CharRef
7231 */
Owen Taylor3473f882001-02-23 17:55:21 +00007232 if (NXT(1) == '#') {
7233 int i = 0;
7234 xmlChar out[10];
7235 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007236 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007237
Daniel Veillarddc171602008-03-26 17:41:38 +00007238 if (value == 0)
7239 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007240 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7241 /*
7242 * So we are using non-UTF-8 buffers
7243 * Check that the char fit on 8bits, if not
7244 * generate a CharRef.
7245 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007246 if (value <= 0xFF) {
7247 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007248 out[1] = 0;
7249 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7250 (!ctxt->disableSAX))
7251 ctxt->sax->characters(ctxt->userData, out, 1);
7252 } else {
7253 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007254 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007255 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007256 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007257 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7258 (!ctxt->disableSAX))
7259 ctxt->sax->reference(ctxt->userData, out);
7260 }
7261 } else {
7262 /*
7263 * Just encode the value in UTF-8
7264 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007265 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007266 out[i] = 0;
7267 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7268 (!ctxt->disableSAX))
7269 ctxt->sax->characters(ctxt->userData, out, i);
7270 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007271 return;
7272 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007273
Daniel Veillard0161e632008-08-28 15:36:32 +00007274 /*
7275 * We are seeing an entity reference
7276 */
7277 ent = xmlParseEntityRef(ctxt);
7278 if (ent == NULL) return;
7279 if (!ctxt->wellFormed)
7280 return;
7281 was_checked = ent->checked;
7282
7283 /* special case of predefined entities */
7284 if ((ent->name == NULL) ||
7285 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7286 val = ent->content;
7287 if (val == NULL) return;
7288 /*
7289 * inline the entity.
7290 */
7291 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7292 (!ctxt->disableSAX))
7293 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7294 return;
7295 }
7296
7297 /*
7298 * The first reference to the entity trigger a parsing phase
7299 * where the ent->children is filled with the result from
7300 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007301 * Note: external parsed entities will not be loaded, it is not
7302 * required for a non-validating parser, unless the parsing option
7303 * of validating, or substituting entities were given. Doing so is
7304 * far more secure as the parser will only process data coming from
7305 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007306 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007307 if (((ent->checked == 0) ||
7308 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007309 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7310 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007311 unsigned long oldnbent = ctxt->nbentities;
7312
7313 /*
7314 * This is a bit hackish but this seems the best
7315 * way to make sure both SAX and DOM entity support
7316 * behaves okay.
7317 */
7318 void *user_data;
7319 if (ctxt->userData == ctxt)
7320 user_data = NULL;
7321 else
7322 user_data = ctxt->userData;
7323
7324 /*
7325 * Check that this entity is well formed
7326 * 4.3.2: An internal general parsed entity is well-formed
7327 * if its replacement text matches the production labeled
7328 * content.
7329 */
7330 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7331 ctxt->depth++;
7332 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7333 user_data, &list);
7334 ctxt->depth--;
7335
7336 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7337 ctxt->depth++;
7338 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7339 user_data, ctxt->depth, ent->URI,
7340 ent->ExternalID, &list);
7341 ctxt->depth--;
7342 } else {
7343 ret = XML_ERR_ENTITY_PE_INTERNAL;
7344 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7345 "invalid entity type found\n", NULL);
7346 }
7347
7348 /*
7349 * Store the number of entities needing parsing for this entity
7350 * content and do checkings
7351 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007352 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7353 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7354 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007355 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007356 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007357 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007358 return;
7359 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007360 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007361 xmlFreeNodeList(list);
7362 return;
7363 }
Owen Taylor3473f882001-02-23 17:55:21 +00007364
Daniel Veillard0161e632008-08-28 15:36:32 +00007365 if ((ret == XML_ERR_OK) && (list != NULL)) {
7366 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7367 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7368 (ent->children == NULL)) {
7369 ent->children = list;
7370 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007371 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007372 * Prune it directly in the generated document
7373 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007374 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007375 if (((list->type == XML_TEXT_NODE) &&
7376 (list->next == NULL)) ||
7377 (ctxt->parseMode == XML_PARSE_READER)) {
7378 list->parent = (xmlNodePtr) ent;
7379 list = NULL;
7380 ent->owner = 1;
7381 } else {
7382 ent->owner = 0;
7383 while (list != NULL) {
7384 list->parent = (xmlNodePtr) ctxt->node;
7385 list->doc = ctxt->myDoc;
7386 if (list->next == NULL)
7387 ent->last = list;
7388 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007389 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007390 list = ent->children;
7391#ifdef LIBXML_LEGACY_ENABLED
7392 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7393 xmlAddEntityReference(ent, list, NULL);
7394#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007395 }
7396 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007397 ent->owner = 1;
7398 while (list != NULL) {
7399 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007400 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007401 if (list->next == NULL)
7402 ent->last = list;
7403 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007404 }
7405 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007406 } else {
7407 xmlFreeNodeList(list);
7408 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007409 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007410 } else if ((ret != XML_ERR_OK) &&
7411 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7412 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7413 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007414 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007415 } else if (list != NULL) {
7416 xmlFreeNodeList(list);
7417 list = NULL;
7418 }
7419 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007420 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007421 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007422 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007423 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007424
Daniel Veillard0161e632008-08-28 15:36:32 +00007425 /*
7426 * Now that the entity content has been gathered
7427 * provide it to the application, this can take different forms based
7428 * on the parsing modes.
7429 */
7430 if (ent->children == NULL) {
7431 /*
7432 * Probably running in SAX mode and the callbacks don't
7433 * build the entity content. So unless we already went
7434 * though parsing for first checking go though the entity
7435 * content to generate callbacks associated to the entity
7436 */
7437 if (was_checked != 0) {
7438 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007439 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007440 * This is a bit hackish but this seems the best
7441 * way to make sure both SAX and DOM entity support
7442 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007443 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007444 if (ctxt->userData == ctxt)
7445 user_data = NULL;
7446 else
7447 user_data = ctxt->userData;
7448
7449 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7450 ctxt->depth++;
7451 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7452 ent->content, user_data, NULL);
7453 ctxt->depth--;
7454 } else if (ent->etype ==
7455 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7456 ctxt->depth++;
7457 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7458 ctxt->sax, user_data, ctxt->depth,
7459 ent->URI, ent->ExternalID, NULL);
7460 ctxt->depth--;
7461 } else {
7462 ret = XML_ERR_ENTITY_PE_INTERNAL;
7463 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7464 "invalid entity type found\n", NULL);
7465 }
7466 if (ret == XML_ERR_ENTITY_LOOP) {
7467 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7468 return;
7469 }
7470 }
7471 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7472 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7473 /*
7474 * Entity reference callback comes second, it's somewhat
7475 * superfluous but a compatibility to historical behaviour
7476 */
7477 ctxt->sax->reference(ctxt->userData, ent->name);
7478 }
7479 return;
7480 }
7481
7482 /*
7483 * If we didn't get any children for the entity being built
7484 */
7485 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7486 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7487 /*
7488 * Create a node.
7489 */
7490 ctxt->sax->reference(ctxt->userData, ent->name);
7491 return;
7492 }
7493
7494 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7495 /*
7496 * There is a problem on the handling of _private for entities
7497 * (bug 155816): Should we copy the content of the field from
7498 * the entity (possibly overwriting some value set by the user
7499 * when a copy is created), should we leave it alone, or should
7500 * we try to take care of different situations? The problem
7501 * is exacerbated by the usage of this field by the xmlReader.
7502 * To fix this bug, we look at _private on the created node
7503 * and, if it's NULL, we copy in whatever was in the entity.
7504 * If it's not NULL we leave it alone. This is somewhat of a
7505 * hack - maybe we should have further tests to determine
7506 * what to do.
7507 */
7508 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7509 /*
7510 * Seems we are generating the DOM content, do
7511 * a simple tree copy for all references except the first
7512 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007513 */
7514 if (((list == NULL) && (ent->owner == 0)) ||
7515 (ctxt->parseMode == XML_PARSE_READER)) {
7516 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7517
7518 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007519 * We are copying here, make sure there is no abuse
7520 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007521 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007522 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7523 return;
7524
7525 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007526 * when operating on a reader, the entities definitions
7527 * are always owning the entities subtree.
7528 if (ctxt->parseMode == XML_PARSE_READER)
7529 ent->owner = 1;
7530 */
7531
7532 cur = ent->children;
7533 while (cur != NULL) {
7534 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7535 if (nw != NULL) {
7536 if (nw->_private == NULL)
7537 nw->_private = cur->_private;
7538 if (firstChild == NULL){
7539 firstChild = nw;
7540 }
7541 nw = xmlAddChild(ctxt->node, nw);
7542 }
7543 if (cur == ent->last) {
7544 /*
7545 * needed to detect some strange empty
7546 * node cases in the reader tests
7547 */
7548 if ((ctxt->parseMode == XML_PARSE_READER) &&
7549 (nw != NULL) &&
7550 (nw->type == XML_ELEMENT_NODE) &&
7551 (nw->children == NULL))
7552 nw->extra = 1;
7553
7554 break;
7555 }
7556 cur = cur->next;
7557 }
7558#ifdef LIBXML_LEGACY_ENABLED
7559 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7560 xmlAddEntityReference(ent, firstChild, nw);
7561#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007562 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007563 xmlNodePtr nw = NULL, cur, next, last,
7564 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007565
7566 /*
7567 * We are copying here, make sure there is no abuse
7568 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007569 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007570 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7571 return;
7572
Daniel Veillard0161e632008-08-28 15:36:32 +00007573 /*
7574 * Copy the entity child list and make it the new
7575 * entity child list. The goal is to make sure any
7576 * ID or REF referenced will be the one from the
7577 * document content and not the entity copy.
7578 */
7579 cur = ent->children;
7580 ent->children = NULL;
7581 last = ent->last;
7582 ent->last = NULL;
7583 while (cur != NULL) {
7584 next = cur->next;
7585 cur->next = NULL;
7586 cur->parent = NULL;
7587 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7588 if (nw != NULL) {
7589 if (nw->_private == NULL)
7590 nw->_private = cur->_private;
7591 if (firstChild == NULL){
7592 firstChild = cur;
7593 }
7594 xmlAddChild((xmlNodePtr) ent, nw);
7595 xmlAddChild(ctxt->node, cur);
7596 }
7597 if (cur == last)
7598 break;
7599 cur = next;
7600 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007601 if (ent->owner == 0)
7602 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007603#ifdef LIBXML_LEGACY_ENABLED
7604 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7605 xmlAddEntityReference(ent, firstChild, nw);
7606#endif /* LIBXML_LEGACY_ENABLED */
7607 } else {
7608 const xmlChar *nbktext;
7609
7610 /*
7611 * the name change is to avoid coalescing of the
7612 * node with a possible previous text one which
7613 * would make ent->children a dangling pointer
7614 */
7615 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7616 -1);
7617 if (ent->children->type == XML_TEXT_NODE)
7618 ent->children->name = nbktext;
7619 if ((ent->last != ent->children) &&
7620 (ent->last->type == XML_TEXT_NODE))
7621 ent->last->name = nbktext;
7622 xmlAddChildList(ctxt->node, ent->children);
7623 }
7624
7625 /*
7626 * This is to avoid a nasty side effect, see
7627 * characters() in SAX.c
7628 */
7629 ctxt->nodemem = 0;
7630 ctxt->nodelen = 0;
7631 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007632 }
7633 }
7634}
7635
7636/**
7637 * xmlParseEntityRef:
7638 * @ctxt: an XML parser context
7639 *
7640 * parse ENTITY references declarations
7641 *
7642 * [68] EntityRef ::= '&' Name ';'
7643 *
7644 * [ WFC: Entity Declared ]
7645 * In a document without any DTD, a document with only an internal DTD
7646 * subset which contains no parameter entity references, or a document
7647 * with "standalone='yes'", the Name given in the entity reference
7648 * must match that in an entity declaration, except that well-formed
7649 * documents need not declare any of the following entities: amp, lt,
7650 * gt, apos, quot. The declaration of a parameter entity must precede
7651 * any reference to it. Similarly, the declaration of a general entity
7652 * must precede any reference to it which appears in a default value in an
7653 * attribute-list declaration. Note that if entities are declared in the
7654 * external subset or in external parameter entities, a non-validating
7655 * processor is not obligated to read and process their declarations;
7656 * for such documents, the rule that an entity must be declared is a
7657 * well-formedness constraint only if standalone='yes'.
7658 *
7659 * [ WFC: Parsed Entity ]
7660 * An entity reference must not contain the name of an unparsed entity
7661 *
7662 * Returns the xmlEntityPtr if found, or NULL otherwise.
7663 */
7664xmlEntityPtr
7665xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007666 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007667 xmlEntityPtr ent = NULL;
7668
7669 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007670 if (ctxt->instate == XML_PARSER_EOF)
7671 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007672
Daniel Veillard0161e632008-08-28 15:36:32 +00007673 if (RAW != '&')
7674 return(NULL);
7675 NEXT;
7676 name = xmlParseName(ctxt);
7677 if (name == NULL) {
7678 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7679 "xmlParseEntityRef: no name\n");
7680 return(NULL);
7681 }
7682 if (RAW != ';') {
7683 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7684 return(NULL);
7685 }
7686 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007687
Daniel Veillard0161e632008-08-28 15:36:32 +00007688 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007689 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007690 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007691 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7692 ent = xmlGetPredefinedEntity(name);
7693 if (ent != NULL)
7694 return(ent);
7695 }
Owen Taylor3473f882001-02-23 17:55:21 +00007696
Daniel Veillard0161e632008-08-28 15:36:32 +00007697 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007698 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007699 */
7700 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007701
Daniel Veillard0161e632008-08-28 15:36:32 +00007702 /*
7703 * Ask first SAX for entity resolution, otherwise try the
7704 * entities which may have stored in the parser context.
7705 */
7706 if (ctxt->sax != NULL) {
7707 if (ctxt->sax->getEntity != NULL)
7708 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007709 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007710 (ctxt->options & XML_PARSE_OLDSAX))
7711 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007712 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7713 (ctxt->userData==ctxt)) {
7714 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007715 }
7716 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007717 if (ctxt->instate == XML_PARSER_EOF)
7718 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007719 /*
7720 * [ WFC: Entity Declared ]
7721 * In a document without any DTD, a document with only an
7722 * internal DTD subset which contains no parameter entity
7723 * references, or a document with "standalone='yes'", the
7724 * Name given in the entity reference must match that in an
7725 * entity declaration, except that well-formed documents
7726 * need not declare any of the following entities: amp, lt,
7727 * gt, apos, quot.
7728 * The declaration of a parameter entity must precede any
7729 * reference to it.
7730 * Similarly, the declaration of a general entity must
7731 * precede any reference to it which appears in a default
7732 * value in an attribute-list declaration. Note that if
7733 * entities are declared in the external subset or in
7734 * external parameter entities, a non-validating processor
7735 * is not obligated to read and process their declarations;
7736 * for such documents, the rule that an entity must be
7737 * declared is a well-formedness constraint only if
7738 * standalone='yes'.
7739 */
7740 if (ent == NULL) {
7741 if ((ctxt->standalone == 1) ||
7742 ((ctxt->hasExternalSubset == 0) &&
7743 (ctxt->hasPErefs == 0))) {
7744 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7745 "Entity '%s' not defined\n", name);
7746 } else {
7747 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7748 "Entity '%s' not defined\n", name);
7749 if ((ctxt->inSubset == 0) &&
7750 (ctxt->sax != NULL) &&
7751 (ctxt->sax->reference != NULL)) {
7752 ctxt->sax->reference(ctxt->userData, name);
7753 }
7754 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007755 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007756 ctxt->valid = 0;
7757 }
7758
7759 /*
7760 * [ WFC: Parsed Entity ]
7761 * An entity reference must not contain the name of an
7762 * unparsed entity
7763 */
7764 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7765 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7766 "Entity reference to unparsed entity %s\n", name);
7767 }
7768
7769 /*
7770 * [ WFC: No External Entity References ]
7771 * Attribute values cannot contain direct or indirect
7772 * entity references to external entities.
7773 */
7774 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7775 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7776 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7777 "Attribute references external entity '%s'\n", name);
7778 }
7779 /*
7780 * [ WFC: No < in Attribute Values ]
7781 * The replacement text of any entity referred to directly or
7782 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007783 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007784 */
7785 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007786 (ent != NULL) &&
7787 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007788 if (((ent->checked & 1) || (ent->checked == 0)) &&
7789 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007790 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7791 "'<' in entity '%s' is not allowed in attributes values\n", name);
7792 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007793 }
7794
7795 /*
7796 * Internal check, no parameter entities here ...
7797 */
7798 else {
7799 switch (ent->etype) {
7800 case XML_INTERNAL_PARAMETER_ENTITY:
7801 case XML_EXTERNAL_PARAMETER_ENTITY:
7802 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7803 "Attempt to reference the parameter entity '%s'\n",
7804 name);
7805 break;
7806 default:
7807 break;
7808 }
7809 }
7810
7811 /*
7812 * [ WFC: No Recursion ]
7813 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007814 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007815 * Done somewhere else
7816 */
Owen Taylor3473f882001-02-23 17:55:21 +00007817 return(ent);
7818}
7819
7820/**
7821 * xmlParseStringEntityRef:
7822 * @ctxt: an XML parser context
7823 * @str: a pointer to an index in the string
7824 *
7825 * parse ENTITY references declarations, but this version parses it from
7826 * a string value.
7827 *
7828 * [68] EntityRef ::= '&' Name ';'
7829 *
7830 * [ WFC: Entity Declared ]
7831 * In a document without any DTD, a document with only an internal DTD
7832 * subset which contains no parameter entity references, or a document
7833 * with "standalone='yes'", the Name given in the entity reference
7834 * must match that in an entity declaration, except that well-formed
7835 * documents need not declare any of the following entities: amp, lt,
7836 * gt, apos, quot. The declaration of a parameter entity must precede
7837 * any reference to it. Similarly, the declaration of a general entity
7838 * must precede any reference to it which appears in a default value in an
7839 * attribute-list declaration. Note that if entities are declared in the
7840 * external subset or in external parameter entities, a non-validating
7841 * processor is not obligated to read and process their declarations;
7842 * for such documents, the rule that an entity must be declared is a
7843 * well-formedness constraint only if standalone='yes'.
7844 *
7845 * [ WFC: Parsed Entity ]
7846 * An entity reference must not contain the name of an unparsed entity
7847 *
7848 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7849 * is updated to the current location in the string.
7850 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007851static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007852xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7853 xmlChar *name;
7854 const xmlChar *ptr;
7855 xmlChar cur;
7856 xmlEntityPtr ent = NULL;
7857
7858 if ((str == NULL) || (*str == NULL))
7859 return(NULL);
7860 ptr = *str;
7861 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007862 if (cur != '&')
7863 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007864
Daniel Veillard0161e632008-08-28 15:36:32 +00007865 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007866 name = xmlParseStringName(ctxt, &ptr);
7867 if (name == NULL) {
7868 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7869 "xmlParseStringEntityRef: no name\n");
7870 *str = ptr;
7871 return(NULL);
7872 }
7873 if (*ptr != ';') {
7874 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007875 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007876 *str = ptr;
7877 return(NULL);
7878 }
7879 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007880
Owen Taylor3473f882001-02-23 17:55:21 +00007881
Daniel Veillard0161e632008-08-28 15:36:32 +00007882 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007883 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007884 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007885 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7886 ent = xmlGetPredefinedEntity(name);
7887 if (ent != NULL) {
7888 xmlFree(name);
7889 *str = ptr;
7890 return(ent);
7891 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007892 }
Owen Taylor3473f882001-02-23 17:55:21 +00007893
Daniel Veillard0161e632008-08-28 15:36:32 +00007894 /*
7895 * Increate the number of entity references parsed
7896 */
7897 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007898
Daniel Veillard0161e632008-08-28 15:36:32 +00007899 /*
7900 * Ask first SAX for entity resolution, otherwise try the
7901 * entities which may have stored in the parser context.
7902 */
7903 if (ctxt->sax != NULL) {
7904 if (ctxt->sax->getEntity != NULL)
7905 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007906 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7907 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007908 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7909 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007910 }
7911 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007912 if (ctxt->instate == XML_PARSER_EOF) {
7913 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007914 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007915 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007916
7917 /*
7918 * [ WFC: Entity Declared ]
7919 * In a document without any DTD, a document with only an
7920 * internal DTD subset which contains no parameter entity
7921 * references, or a document with "standalone='yes'", the
7922 * Name given in the entity reference must match that in an
7923 * entity declaration, except that well-formed documents
7924 * need not declare any of the following entities: amp, lt,
7925 * gt, apos, quot.
7926 * The declaration of a parameter entity must precede any
7927 * reference to it.
7928 * Similarly, the declaration of a general entity must
7929 * precede any reference to it which appears in a default
7930 * value in an attribute-list declaration. Note that if
7931 * entities are declared in the external subset or in
7932 * external parameter entities, a non-validating processor
7933 * is not obligated to read and process their declarations;
7934 * for such documents, the rule that an entity must be
7935 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007936 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007937 */
7938 if (ent == NULL) {
7939 if ((ctxt->standalone == 1) ||
7940 ((ctxt->hasExternalSubset == 0) &&
7941 (ctxt->hasPErefs == 0))) {
7942 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7943 "Entity '%s' not defined\n", name);
7944 } else {
7945 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7946 "Entity '%s' not defined\n",
7947 name);
7948 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007949 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007950 /* TODO ? check regressions ctxt->valid = 0; */
7951 }
7952
7953 /*
7954 * [ WFC: Parsed Entity ]
7955 * An entity reference must not contain the name of an
7956 * unparsed entity
7957 */
7958 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7959 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7960 "Entity reference to unparsed entity %s\n", name);
7961 }
7962
7963 /*
7964 * [ WFC: No External Entity References ]
7965 * Attribute values cannot contain direct or indirect
7966 * entity references to external entities.
7967 */
7968 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7969 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7970 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7971 "Attribute references external entity '%s'\n", name);
7972 }
7973 /*
7974 * [ WFC: No < in Attribute Values ]
7975 * The replacement text of any entity referred to directly or
7976 * indirectly in an attribute value (other than "&lt;") must
7977 * not contain a <.
7978 */
7979 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7980 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007981 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007982 (xmlStrchr(ent->content, '<'))) {
7983 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7984 "'<' in entity '%s' is not allowed in attributes values\n",
7985 name);
7986 }
7987
7988 /*
7989 * Internal check, no parameter entities here ...
7990 */
7991 else {
7992 switch (ent->etype) {
7993 case XML_INTERNAL_PARAMETER_ENTITY:
7994 case XML_EXTERNAL_PARAMETER_ENTITY:
7995 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7996 "Attempt to reference the parameter entity '%s'\n",
7997 name);
7998 break;
7999 default:
8000 break;
8001 }
8002 }
8003
8004 /*
8005 * [ WFC: No Recursion ]
8006 * A parsed entity must not contain a recursive reference
8007 * to itself, either directly or indirectly.
8008 * Done somewhere else
8009 */
8010
8011 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008012 *str = ptr;
8013 return(ent);
8014}
8015
8016/**
8017 * xmlParsePEReference:
8018 * @ctxt: an XML parser context
8019 *
8020 * parse PEReference declarations
8021 * The entity content is handled directly by pushing it's content as
8022 * a new input stream.
8023 *
8024 * [69] PEReference ::= '%' Name ';'
8025 *
8026 * [ WFC: No Recursion ]
8027 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008028 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008029 *
8030 * [ WFC: Entity Declared ]
8031 * In a document without any DTD, a document with only an internal DTD
8032 * subset which contains no parameter entity references, or a document
8033 * with "standalone='yes'", ... ... The declaration of a parameter
8034 * entity must precede any reference to it...
8035 *
8036 * [ VC: Entity Declared ]
8037 * In a document with an external subset or external parameter entities
8038 * with "standalone='no'", ... ... The declaration of a parameter entity
8039 * must precede any reference to it...
8040 *
8041 * [ WFC: In DTD ]
8042 * Parameter-entity references may only appear in the DTD.
8043 * NOTE: misleading but this is handled.
8044 */
8045void
Daniel Veillard8f597c32003-10-06 08:19:27 +00008046xmlParsePEReference(xmlParserCtxtPtr ctxt)
8047{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008048 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008049 xmlEntityPtr entity = NULL;
8050 xmlParserInputPtr input;
8051
Daniel Veillard0161e632008-08-28 15:36:32 +00008052 if (RAW != '%')
8053 return;
8054 NEXT;
8055 name = xmlParseName(ctxt);
8056 if (name == NULL) {
8057 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8058 "xmlParsePEReference: no name\n");
8059 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008060 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008061 if (RAW != ';') {
8062 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8063 return;
8064 }
8065
8066 NEXT;
8067
8068 /*
8069 * Increate the number of entity references parsed
8070 */
8071 ctxt->nbentities++;
8072
8073 /*
8074 * Request the entity from SAX
8075 */
8076 if ((ctxt->sax != NULL) &&
8077 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008078 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8079 if (ctxt->instate == XML_PARSER_EOF)
8080 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00008081 if (entity == NULL) {
8082 /*
8083 * [ WFC: Entity Declared ]
8084 * In a document without any DTD, a document with only an
8085 * internal DTD subset which contains no parameter entity
8086 * references, or a document with "standalone='yes'", ...
8087 * ... The declaration of a parameter entity must precede
8088 * any reference to it...
8089 */
8090 if ((ctxt->standalone == 1) ||
8091 ((ctxt->hasExternalSubset == 0) &&
8092 (ctxt->hasPErefs == 0))) {
8093 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8094 "PEReference: %%%s; not found\n",
8095 name);
8096 } else {
8097 /*
8098 * [ VC: Entity Declared ]
8099 * In a document with an external subset or external
8100 * parameter entities with "standalone='no'", ...
8101 * ... The declaration of a parameter entity must
8102 * precede any reference to it...
8103 */
8104 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8105 "PEReference: %%%s; not found\n",
8106 name, NULL);
8107 ctxt->valid = 0;
8108 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008109 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008110 } else {
8111 /*
8112 * Internal checking in case the entity quest barfed
8113 */
8114 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8115 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8116 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8117 "Internal: %%%s; is not a parameter entity\n",
8118 name, NULL);
8119 } else if (ctxt->input->free != deallocblankswrapper) {
8120 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8121 if (xmlPushInput(ctxt, input) < 0)
8122 return;
8123 } else {
Neel Mehta90ccb582017-04-07 17:43:02 +02008124 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8125 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8126 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8127 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8128 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8129 (ctxt->replaceEntities == 0) &&
8130 (ctxt->validate == 0))
8131 return;
8132
Daniel Veillard0161e632008-08-28 15:36:32 +00008133 /*
8134 * TODO !!!
8135 * handle the extra spaces added before and after
8136 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8137 */
8138 input = xmlNewEntityInputStream(ctxt, entity);
8139 if (xmlPushInput(ctxt, input) < 0)
8140 return;
8141 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8142 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8143 (IS_BLANK_CH(NXT(5)))) {
8144 xmlParseTextDecl(ctxt);
8145 if (ctxt->errNo ==
8146 XML_ERR_UNSUPPORTED_ENCODING) {
8147 /*
8148 * The XML REC instructs us to stop parsing
8149 * right here
8150 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08008151 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00008152 return;
8153 }
8154 }
8155 }
8156 }
8157 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008158}
8159
8160/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008161 * xmlLoadEntityContent:
8162 * @ctxt: an XML parser context
8163 * @entity: an unloaded system entity
8164 *
8165 * Load the original content of the given system entity from the
8166 * ExternalID/SystemID given. This is to be used for Included in Literal
8167 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8168 *
8169 * Returns 0 in case of success and -1 in case of failure
8170 */
8171static int
8172xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8173 xmlParserInputPtr input;
8174 xmlBufferPtr buf;
8175 int l, c;
8176 int count = 0;
8177
8178 if ((ctxt == NULL) || (entity == NULL) ||
8179 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8180 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8181 (entity->content != NULL)) {
8182 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8183 "xmlLoadEntityContent parameter error");
8184 return(-1);
8185 }
8186
8187 if (xmlParserDebugEntities)
8188 xmlGenericError(xmlGenericErrorContext,
8189 "Reading %s entity content input\n", entity->name);
8190
8191 buf = xmlBufferCreate();
8192 if (buf == NULL) {
8193 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8194 "xmlLoadEntityContent parameter error");
8195 return(-1);
8196 }
8197
8198 input = xmlNewEntityInputStream(ctxt, entity);
8199 if (input == NULL) {
8200 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8201 "xmlLoadEntityContent input error");
8202 xmlBufferFree(buf);
8203 return(-1);
8204 }
8205
8206 /*
8207 * Push the entity as the current input, read char by char
8208 * saving to the buffer until the end of the entity or an error
8209 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008210 if (xmlPushInput(ctxt, input) < 0) {
8211 xmlBufferFree(buf);
8212 return(-1);
8213 }
8214
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008215 GROW;
8216 c = CUR_CHAR(l);
8217 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8218 (IS_CHAR(c))) {
8219 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008220 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008221 count = 0;
8222 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008223 if (ctxt->instate == XML_PARSER_EOF) {
8224 xmlBufferFree(buf);
8225 return(-1);
8226 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008227 }
8228 NEXTL(l);
8229 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008230 if (c == 0) {
8231 count = 0;
8232 GROW;
8233 if (ctxt->instate == XML_PARSER_EOF) {
8234 xmlBufferFree(buf);
8235 return(-1);
8236 }
8237 c = CUR_CHAR(l);
8238 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008239 }
8240
8241 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8242 xmlPopInput(ctxt);
8243 } else if (!IS_CHAR(c)) {
8244 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8245 "xmlLoadEntityContent: invalid char value %d\n",
8246 c);
8247 xmlBufferFree(buf);
8248 return(-1);
8249 }
8250 entity->content = buf->content;
8251 buf->content = NULL;
8252 xmlBufferFree(buf);
8253
8254 return(0);
8255}
8256
8257/**
Owen Taylor3473f882001-02-23 17:55:21 +00008258 * xmlParseStringPEReference:
8259 * @ctxt: an XML parser context
8260 * @str: a pointer to an index in the string
8261 *
8262 * parse PEReference declarations
8263 *
8264 * [69] PEReference ::= '%' Name ';'
8265 *
8266 * [ WFC: No Recursion ]
8267 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008268 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008269 *
8270 * [ WFC: Entity Declared ]
8271 * In a document without any DTD, a document with only an internal DTD
8272 * subset which contains no parameter entity references, or a document
8273 * with "standalone='yes'", ... ... The declaration of a parameter
8274 * entity must precede any reference to it...
8275 *
8276 * [ VC: Entity Declared ]
8277 * In a document with an external subset or external parameter entities
8278 * with "standalone='no'", ... ... The declaration of a parameter entity
8279 * must precede any reference to it...
8280 *
8281 * [ WFC: In DTD ]
8282 * Parameter-entity references may only appear in the DTD.
8283 * NOTE: misleading but this is handled.
8284 *
8285 * Returns the string of the entity content.
8286 * str is updated to the current value of the index
8287 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008288static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008289xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8290 const xmlChar *ptr;
8291 xmlChar cur;
8292 xmlChar *name;
8293 xmlEntityPtr entity = NULL;
8294
8295 if ((str == NULL) || (*str == NULL)) return(NULL);
8296 ptr = *str;
8297 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008298 if (cur != '%')
8299 return(NULL);
8300 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008301 name = xmlParseStringName(ctxt, &ptr);
8302 if (name == NULL) {
8303 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8304 "xmlParseStringPEReference: no name\n");
8305 *str = ptr;
8306 return(NULL);
8307 }
8308 cur = *ptr;
8309 if (cur != ';') {
8310 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8311 xmlFree(name);
8312 *str = ptr;
8313 return(NULL);
8314 }
8315 ptr++;
8316
8317 /*
8318 * Increate the number of entity references parsed
8319 */
8320 ctxt->nbentities++;
8321
8322 /*
8323 * Request the entity from SAX
8324 */
8325 if ((ctxt->sax != NULL) &&
8326 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008327 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8328 if (ctxt->instate == XML_PARSER_EOF) {
8329 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008330 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008331 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008332 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008333 if (entity == NULL) {
8334 /*
8335 * [ WFC: Entity Declared ]
8336 * In a document without any DTD, a document with only an
8337 * internal DTD subset which contains no parameter entity
8338 * references, or a document with "standalone='yes'", ...
8339 * ... The declaration of a parameter entity must precede
8340 * any reference to it...
8341 */
8342 if ((ctxt->standalone == 1) ||
8343 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8344 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8345 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008346 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008347 /*
8348 * [ VC: Entity Declared ]
8349 * In a document with an external subset or external
8350 * parameter entities with "standalone='no'", ...
8351 * ... The declaration of a parameter entity must
8352 * precede any reference to it...
8353 */
8354 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8355 "PEReference: %%%s; not found\n",
8356 name, NULL);
8357 ctxt->valid = 0;
8358 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008359 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008360 } else {
8361 /*
8362 * Internal checking in case the entity quest barfed
8363 */
8364 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8365 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8366 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8367 "%%%s; is not a parameter entity\n",
8368 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008369 }
8370 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008371 ctxt->hasPErefs = 1;
8372 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008373 *str = ptr;
8374 return(entity);
8375}
8376
8377/**
8378 * xmlParseDocTypeDecl:
8379 * @ctxt: an XML parser context
8380 *
8381 * parse a DOCTYPE declaration
8382 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008383 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008384 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8385 *
8386 * [ VC: Root Element Type ]
8387 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008388 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008389 */
8390
8391void
8392xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008393 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008394 xmlChar *ExternalID = NULL;
8395 xmlChar *URI = NULL;
8396
8397 /*
8398 * We know that '<!DOCTYPE' has been detected.
8399 */
8400 SKIP(9);
8401
8402 SKIP_BLANKS;
8403
8404 /*
8405 * Parse the DOCTYPE name.
8406 */
8407 name = xmlParseName(ctxt);
8408 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008409 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8410 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008411 }
8412 ctxt->intSubName = name;
8413
8414 SKIP_BLANKS;
8415
8416 /*
8417 * Check for SystemID and ExternalID
8418 */
8419 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8420
8421 if ((URI != NULL) || (ExternalID != NULL)) {
8422 ctxt->hasExternalSubset = 1;
8423 }
8424 ctxt->extSubURI = URI;
8425 ctxt->extSubSystem = ExternalID;
8426
8427 SKIP_BLANKS;
8428
8429 /*
8430 * Create and update the internal subset.
8431 */
8432 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8433 (!ctxt->disableSAX))
8434 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008435 if (ctxt->instate == XML_PARSER_EOF)
8436 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008437
8438 /*
8439 * Is there any internal subset declarations ?
8440 * they are handled separately in xmlParseInternalSubset()
8441 */
8442 if (RAW == '[')
8443 return;
8444
8445 /*
8446 * We should be at the end of the DOCTYPE declaration.
8447 */
8448 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008449 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008450 }
8451 NEXT;
8452}
8453
8454/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008455 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008456 * @ctxt: an XML parser context
8457 *
8458 * parse the internal subset declaration
8459 *
8460 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8461 */
8462
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008463static void
Owen Taylor3473f882001-02-23 17:55:21 +00008464xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8465 /*
8466 * Is there any DTD definition ?
8467 */
8468 if (RAW == '[') {
8469 ctxt->instate = XML_PARSER_DTD;
8470 NEXT;
8471 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008472 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008473 * PEReferences.
8474 * Subsequence (markupdecl | PEReference | S)*
8475 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008476 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008477 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008478 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008479
8480 SKIP_BLANKS;
8481 xmlParseMarkupDecl(ctxt);
8482 xmlParsePEReference(ctxt);
8483
8484 /*
8485 * Pop-up of finished entities.
8486 */
8487 while ((RAW == 0) && (ctxt->inputNr > 1))
8488 xmlPopInput(ctxt);
8489
8490 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008491 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008492 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008493 break;
8494 }
8495 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008496 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008497 NEXT;
8498 SKIP_BLANKS;
8499 }
8500 }
8501
8502 /*
8503 * We should be at the end of the DOCTYPE declaration.
8504 */
8505 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008506 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008507 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008508 }
8509 NEXT;
8510}
8511
Daniel Veillard81273902003-09-30 00:43:48 +00008512#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008513/**
8514 * xmlParseAttribute:
8515 * @ctxt: an XML parser context
8516 * @value: a xmlChar ** used to store the value of the attribute
8517 *
8518 * parse an attribute
8519 *
8520 * [41] Attribute ::= Name Eq AttValue
8521 *
8522 * [ WFC: No External Entity References ]
8523 * Attribute values cannot contain direct or indirect entity references
8524 * to external entities.
8525 *
8526 * [ WFC: No < in Attribute Values ]
8527 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008528 * an attribute value (other than "&lt;") must not contain a <.
8529 *
Owen Taylor3473f882001-02-23 17:55:21 +00008530 * [ VC: Attribute Value Type ]
8531 * The attribute must have been declared; the value must be of the type
8532 * declared for it.
8533 *
8534 * [25] Eq ::= S? '=' S?
8535 *
8536 * With namespace:
8537 *
8538 * [NS 11] Attribute ::= QName Eq AttValue
8539 *
8540 * Also the case QName == xmlns:??? is handled independently as a namespace
8541 * definition.
8542 *
8543 * Returns the attribute name, and the value in *value.
8544 */
8545
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008546const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008547xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008548 const xmlChar *name;
8549 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008550
8551 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008552 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008553 name = xmlParseName(ctxt);
8554 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008555 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008556 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008557 return(NULL);
8558 }
8559
8560 /*
8561 * read the value
8562 */
8563 SKIP_BLANKS;
8564 if (RAW == '=') {
8565 NEXT;
8566 SKIP_BLANKS;
8567 val = xmlParseAttValue(ctxt);
8568 ctxt->instate = XML_PARSER_CONTENT;
8569 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008570 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008571 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008572 return(NULL);
8573 }
8574
8575 /*
8576 * Check that xml:lang conforms to the specification
8577 * No more registered as an error, just generate a warning now
8578 * since this was deprecated in XML second edition
8579 */
8580 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8581 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008582 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8583 "Malformed value for xml:lang : %s\n",
8584 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008585 }
8586 }
8587
8588 /*
8589 * Check that xml:space conforms to the specification
8590 */
8591 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8592 if (xmlStrEqual(val, BAD_CAST "default"))
8593 *(ctxt->space) = 0;
8594 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8595 *(ctxt->space) = 1;
8596 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008597 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008598"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008599 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008600 }
8601 }
8602
8603 *value = val;
8604 return(name);
8605}
8606
8607/**
8608 * xmlParseStartTag:
8609 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008610 *
Owen Taylor3473f882001-02-23 17:55:21 +00008611 * parse a start of tag either for rule element or
8612 * EmptyElement. In both case we don't parse the tag closing chars.
8613 *
8614 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8615 *
8616 * [ WFC: Unique Att Spec ]
8617 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008618 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008619 *
8620 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8621 *
8622 * [ WFC: Unique Att Spec ]
8623 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008624 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008625 *
8626 * With namespace:
8627 *
8628 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8629 *
8630 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8631 *
8632 * Returns the element name parsed
8633 */
8634
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008635const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008636xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008637 const xmlChar *name;
8638 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008639 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008640 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008641 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008642 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008643 int i;
8644
8645 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008646 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008647
8648 name = xmlParseName(ctxt);
8649 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008650 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008651 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008652 return(NULL);
8653 }
8654
8655 /*
8656 * Now parse the attributes, it ends up with the ending
8657 *
8658 * (S Attribute)* S?
8659 */
8660 SKIP_BLANKS;
8661 GROW;
8662
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008663 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008664 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008665 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008666 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008667 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008668
8669 attname = xmlParseAttribute(ctxt, &attvalue);
8670 if ((attname != NULL) && (attvalue != NULL)) {
8671 /*
8672 * [ WFC: Unique Att Spec ]
8673 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008674 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008675 */
8676 for (i = 0; i < nbatts;i += 2) {
8677 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008678 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008679 xmlFree(attvalue);
8680 goto failed;
8681 }
8682 }
Owen Taylor3473f882001-02-23 17:55:21 +00008683 /*
8684 * Add the pair to atts
8685 */
8686 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008687 maxatts = 22; /* allow for 10 attrs by default */
8688 atts = (const xmlChar **)
8689 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008690 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008691 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008692 if (attvalue != NULL)
8693 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008694 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008695 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008696 ctxt->atts = atts;
8697 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008698 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008699 const xmlChar **n;
8700
Owen Taylor3473f882001-02-23 17:55:21 +00008701 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008702 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008703 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008704 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008705 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008706 if (attvalue != NULL)
8707 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008708 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008709 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008710 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008711 ctxt->atts = atts;
8712 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008713 }
8714 atts[nbatts++] = attname;
8715 atts[nbatts++] = attvalue;
8716 atts[nbatts] = NULL;
8717 atts[nbatts + 1] = NULL;
8718 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008719 if (attvalue != NULL)
8720 xmlFree(attvalue);
8721 }
8722
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008723failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008724
Daniel Veillard3772de32002-12-17 10:31:45 +00008725 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008726 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8727 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008728 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8730 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008731 }
8732 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008733 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8734 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008735 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8736 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008737 break;
8738 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008739 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008740 GROW;
8741 }
8742
8743 /*
8744 * SAX: Start of Element !
8745 */
8746 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008747 (!ctxt->disableSAX)) {
8748 if (nbatts > 0)
8749 ctxt->sax->startElement(ctxt->userData, name, atts);
8750 else
8751 ctxt->sax->startElement(ctxt->userData, name, NULL);
8752 }
Owen Taylor3473f882001-02-23 17:55:21 +00008753
8754 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008755 /* Free only the content strings */
8756 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008757 if (atts[i] != NULL)
8758 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008759 }
8760 return(name);
8761}
8762
8763/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008764 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008765 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008766 * @line: line of the start tag
8767 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008768 *
8769 * parse an end of tag
8770 *
8771 * [42] ETag ::= '</' Name S? '>'
8772 *
8773 * With namespace
8774 *
8775 * [NS 9] ETag ::= '</' QName S? '>'
8776 */
8777
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008778static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008779xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008780 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008781
8782 GROW;
8783 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008784 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008785 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008786 return;
8787 }
8788 SKIP(2);
8789
Daniel Veillard46de64e2002-05-29 08:21:33 +00008790 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008791
8792 /*
8793 * We should definitely be at the ending "S? '>'" part
8794 */
8795 GROW;
8796 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008797 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008798 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008799 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008800 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008801
8802 /*
8803 * [ WFC: Element Type Match ]
8804 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008805 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008806 *
8807 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008808 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008809 if (name == NULL) name = BAD_CAST "unparseable";
8810 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008811 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008812 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008813 }
8814
8815 /*
8816 * SAX: End of Tag
8817 */
8818 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8819 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008820 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008821
Daniel Veillarde57ec792003-09-10 10:50:59 +00008822 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008823 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008824 return;
8825}
8826
8827/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008828 * xmlParseEndTag:
8829 * @ctxt: an XML parser context
8830 *
8831 * parse an end of tag
8832 *
8833 * [42] ETag ::= '</' Name S? '>'
8834 *
8835 * With namespace
8836 *
8837 * [NS 9] ETag ::= '</' QName S? '>'
8838 */
8839
8840void
8841xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008842 xmlParseEndTag1(ctxt, 0);
8843}
Daniel Veillard81273902003-09-30 00:43:48 +00008844#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008845
8846/************************************************************************
8847 * *
8848 * SAX 2 specific operations *
8849 * *
8850 ************************************************************************/
8851
Daniel Veillard0fb18932003-09-07 09:14:37 +00008852/*
8853 * xmlGetNamespace:
8854 * @ctxt: an XML parser context
8855 * @prefix: the prefix to lookup
8856 *
8857 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008858 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 *
8860 * Returns the namespace name or NULL if not bound
8861 */
8862static const xmlChar *
8863xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8864 int i;
8865
Daniel Veillarde57ec792003-09-10 10:50:59 +00008866 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008867 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008868 if (ctxt->nsTab[i] == prefix) {
8869 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8870 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008871 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008872 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008873 return(NULL);
8874}
8875
8876/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008877 * xmlParseQName:
8878 * @ctxt: an XML parser context
8879 * @prefix: pointer to store the prefix part
8880 *
8881 * parse an XML Namespace QName
8882 *
8883 * [6] QName ::= (Prefix ':')? LocalPart
8884 * [7] Prefix ::= NCName
8885 * [8] LocalPart ::= NCName
8886 *
8887 * Returns the Name parsed or NULL
8888 */
8889
8890static const xmlChar *
8891xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8892 const xmlChar *l, *p;
8893
8894 GROW;
8895
8896 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008897 if (l == NULL) {
8898 if (CUR == ':') {
8899 l = xmlParseName(ctxt);
8900 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008901 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008902 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008903 *prefix = NULL;
8904 return(l);
8905 }
8906 }
8907 return(NULL);
8908 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008909 if (CUR == ':') {
8910 NEXT;
8911 p = l;
8912 l = xmlParseNCName(ctxt);
8913 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008914 xmlChar *tmp;
8915
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008916 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8917 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008918 l = xmlParseNmtoken(ctxt);
8919 if (l == NULL)
8920 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8921 else {
8922 tmp = xmlBuildQName(l, p, NULL, 0);
8923 xmlFree((char *)l);
8924 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008925 p = xmlDictLookup(ctxt->dict, tmp, -1);
8926 if (tmp != NULL) xmlFree(tmp);
8927 *prefix = NULL;
8928 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008929 }
8930 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008931 xmlChar *tmp;
8932
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008933 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8934 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008935 NEXT;
8936 tmp = (xmlChar *) xmlParseName(ctxt);
8937 if (tmp != NULL) {
8938 tmp = xmlBuildQName(tmp, l, NULL, 0);
8939 l = xmlDictLookup(ctxt->dict, tmp, -1);
8940 if (tmp != NULL) xmlFree(tmp);
8941 *prefix = p;
8942 return(l);
8943 }
8944 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8945 l = xmlDictLookup(ctxt->dict, tmp, -1);
8946 if (tmp != NULL) xmlFree(tmp);
8947 *prefix = p;
8948 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008949 }
8950 *prefix = p;
8951 } else
8952 *prefix = NULL;
8953 return(l);
8954}
8955
8956/**
8957 * xmlParseQNameAndCompare:
8958 * @ctxt: an XML parser context
8959 * @name: the localname
8960 * @prefix: the prefix, if any.
8961 *
8962 * parse an XML name and compares for match
8963 * (specialized for endtag parsing)
8964 *
8965 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8966 * and the name for mismatch
8967 */
8968
8969static const xmlChar *
8970xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8971 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008972 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008973 const xmlChar *in;
8974 const xmlChar *ret;
8975 const xmlChar *prefix2;
8976
8977 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8978
8979 GROW;
8980 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008981
Daniel Veillard0fb18932003-09-07 09:14:37 +00008982 cmp = prefix;
8983 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008984 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008985 ++cmp;
8986 }
8987 if ((*cmp == 0) && (*in == ':')) {
8988 in++;
8989 cmp = name;
8990 while (*in != 0 && *in == *cmp) {
8991 ++in;
8992 ++cmp;
8993 }
William M. Brack76e95df2003-10-18 16:20:14 +00008994 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008995 /* success */
8996 ctxt->input->cur = in;
8997 return((const xmlChar*) 1);
8998 }
8999 }
9000 /*
9001 * all strings coms from the dictionary, equality can be done directly
9002 */
9003 ret = xmlParseQName (ctxt, &prefix2);
9004 if ((ret == name) && (prefix == prefix2))
9005 return((const xmlChar*) 1);
9006 return ret;
9007}
9008
9009/**
9010 * xmlParseAttValueInternal:
9011 * @ctxt: an XML parser context
9012 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009013 * @alloc: whether the attribute was reallocated as a new string
9014 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00009015 *
9016 * parse a value for an attribute.
9017 * NOTE: if no normalization is needed, the routine will return pointers
9018 * directly from the data buffer.
9019 *
9020 * 3.3.3 Attribute-Value Normalization:
9021 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009022 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009023 * - a character reference is processed by appending the referenced
9024 * character to the attribute value
9025 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009026 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00009027 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028 * appending #x20 to the normalized value, except that only a single
9029 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009030 * parsed entity or the literal entity value of an internal parsed entity
9031 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00009032 * If the declared value is not CDATA, then the XML processor must further
9033 * process the normalized attribute value by discarding any leading and
9034 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009035 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009036 * All attributes for which no declaration has been read should be treated
9037 * by a non-validating parser as if declared CDATA.
9038 *
9039 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040 * caller if it was copied, this can be detected by val[*len] == 0.
9041 */
9042
9043static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009044xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9045 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009046{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009047 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009048 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009049 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08009050 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009051
9052 GROW;
9053 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08009054 line = ctxt->input->line;
9055 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009056 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009057 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009058 return (NULL);
9059 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009060 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009061
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009062 /*
9063 * try to handle in this routine the most common case where no
9064 * allocation of a new string is required and where content is
9065 * pure ASCII.
9066 */
9067 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009068 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009069 end = ctxt->input->end;
9070 start = in;
9071 if (in >= end) {
9072 const xmlChar *oldbase = ctxt->input->base;
9073 GROW;
9074 if (oldbase != ctxt->input->base) {
9075 long delta = ctxt->input->base - oldbase;
9076 start = start + delta;
9077 in = in + delta;
9078 }
9079 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009080 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009081 if (normalize) {
9082 /*
9083 * Skip any leading spaces
9084 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009085 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009086 ((*in == 0x20) || (*in == 0x9) ||
9087 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009088 if (*in == 0xA) {
9089 line++; col = 1;
9090 } else {
9091 col++;
9092 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009093 in++;
9094 start = in;
9095 if (in >= end) {
9096 const xmlChar *oldbase = ctxt->input->base;
9097 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009098 if (ctxt->instate == XML_PARSER_EOF)
9099 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009100 if (oldbase != ctxt->input->base) {
9101 long delta = ctxt->input->base - oldbase;
9102 start = start + delta;
9103 in = in + delta;
9104 }
9105 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009106 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9107 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9108 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009109 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009110 return(NULL);
9111 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009112 }
9113 }
9114 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9115 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009116 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009117 if ((*in++ == 0x20) && (*in == 0x20)) break;
9118 if (in >= end) {
9119 const xmlChar *oldbase = ctxt->input->base;
9120 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009121 if (ctxt->instate == XML_PARSER_EOF)
9122 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009123 if (oldbase != ctxt->input->base) {
9124 long delta = ctxt->input->base - oldbase;
9125 start = start + delta;
9126 in = in + delta;
9127 }
9128 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009129 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9130 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9131 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009132 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009133 return(NULL);
9134 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009135 }
9136 }
9137 last = in;
9138 /*
9139 * skip the trailing blanks
9140 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009141 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009142 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009143 ((*in == 0x20) || (*in == 0x9) ||
9144 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009145 if (*in == 0xA) {
9146 line++, col = 1;
9147 } else {
9148 col++;
9149 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009150 in++;
9151 if (in >= end) {
9152 const xmlChar *oldbase = ctxt->input->base;
9153 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009154 if (ctxt->instate == XML_PARSER_EOF)
9155 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009156 if (oldbase != ctxt->input->base) {
9157 long delta = ctxt->input->base - oldbase;
9158 start = start + delta;
9159 in = in + delta;
9160 last = last + delta;
9161 }
9162 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009163 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9164 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9165 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009166 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009167 return(NULL);
9168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009169 }
9170 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009171 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9172 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9173 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009174 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009175 return(NULL);
9176 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009177 if (*in != limit) goto need_complex;
9178 } else {
9179 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9180 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9181 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009182 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009183 if (in >= end) {
9184 const xmlChar *oldbase = ctxt->input->base;
9185 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009186 if (ctxt->instate == XML_PARSER_EOF)
9187 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009188 if (oldbase != ctxt->input->base) {
9189 long delta = ctxt->input->base - oldbase;
9190 start = start + delta;
9191 in = in + delta;
9192 }
9193 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009194 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9195 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9196 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009197 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009198 return(NULL);
9199 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009200 }
9201 }
9202 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009203 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9204 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9205 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009206 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009207 return(NULL);
9208 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009209 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009210 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009211 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009212 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009213 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009214 *len = last - start;
9215 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009216 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009217 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009218 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009219 }
9220 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009221 ctxt->input->line = line;
9222 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009223 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009224 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009225need_complex:
9226 if (alloc) *alloc = 1;
9227 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009228}
9229
9230/**
9231 * xmlParseAttribute2:
9232 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009233 * @pref: the element prefix
9234 * @elem: the element name
9235 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009236 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009237 * @len: an int * to save the length of the attribute
9238 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009239 *
9240 * parse an attribute in the new SAX2 framework.
9241 *
9242 * Returns the attribute name, and the value in *value, .
9243 */
9244
9245static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009246xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009247 const xmlChar * pref, const xmlChar * elem,
9248 const xmlChar ** prefix, xmlChar ** value,
9249 int *len, int *alloc)
9250{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009251 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009252 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009253 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009254
9255 *value = NULL;
9256 GROW;
9257 name = xmlParseQName(ctxt, prefix);
9258 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009259 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9260 "error parsing attribute name\n");
9261 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009262 }
9263
9264 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009265 * get the type if needed
9266 */
9267 if (ctxt->attsSpecial != NULL) {
9268 int type;
9269
9270 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009271 pref, elem, *prefix, name);
9272 if (type != 0)
9273 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009274 }
9275
9276 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009277 * read the value
9278 */
9279 SKIP_BLANKS;
9280 if (RAW == '=') {
9281 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009282 SKIP_BLANKS;
9283 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9284 if (normalize) {
9285 /*
9286 * Sometimes a second normalisation pass for spaces is needed
9287 * but that only happens if charrefs or entities refernces
9288 * have been used in the attribute value, i.e. the attribute
9289 * value have been extracted in an allocated string already.
9290 */
9291 if (*alloc) {
9292 const xmlChar *val2;
9293
9294 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009295 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009296 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009297 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009298 }
9299 }
9300 }
9301 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009302 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009303 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9304 "Specification mandate value for attribute %s\n",
9305 name);
9306 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009307 }
9308
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009309 if (*prefix == ctxt->str_xml) {
9310 /*
9311 * Check that xml:lang conforms to the specification
9312 * No more registered as an error, just generate a warning now
9313 * since this was deprecated in XML second edition
9314 */
9315 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9316 internal_val = xmlStrndup(val, *len);
9317 if (!xmlCheckLanguageID(internal_val)) {
9318 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9319 "Malformed value for xml:lang : %s\n",
9320 internal_val, NULL);
9321 }
9322 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009323
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009324 /*
9325 * Check that xml:space conforms to the specification
9326 */
9327 if (xmlStrEqual(name, BAD_CAST "space")) {
9328 internal_val = xmlStrndup(val, *len);
9329 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9330 *(ctxt->space) = 0;
9331 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9332 *(ctxt->space) = 1;
9333 else {
9334 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9335 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9336 internal_val, NULL);
9337 }
9338 }
9339 if (internal_val) {
9340 xmlFree(internal_val);
9341 }
9342 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009343
9344 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009345 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009346}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009347/**
9348 * xmlParseStartTag2:
9349 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009350 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009351 * parse a start of tag either for rule element or
9352 * EmptyElement. In both case we don't parse the tag closing chars.
9353 * This routine is called when running SAX2 parsing
9354 *
9355 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9356 *
9357 * [ WFC: Unique Att Spec ]
9358 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009359 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009360 *
9361 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9362 *
9363 * [ WFC: Unique Att Spec ]
9364 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009365 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009366 *
9367 * With namespace:
9368 *
9369 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9370 *
9371 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9372 *
9373 * Returns the element name parsed
9374 */
9375
9376static const xmlChar *
9377xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009378 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009379 const xmlChar *localname;
9380 const xmlChar *prefix;
9381 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009382 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009383 const xmlChar *nsname;
9384 xmlChar *attvalue;
9385 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009386 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009387 int nratts, nbatts, nbdef;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009388 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009389 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009390 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009391
9392 if (RAW != '<') return(NULL);
9393 NEXT1;
9394
9395 /*
9396 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9397 * point since the attribute values may be stored as pointers to
9398 * the buffer and calling SHRINK would destroy them !
9399 * The Shrinking is only possible once the full set of attribute
9400 * callbacks have been done.
9401 */
9402 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009403 cur = ctxt->input->cur - ctxt->input->base;
9404 nbatts = 0;
9405 nratts = 0;
9406 nbdef = 0;
9407 nbNs = 0;
9408 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009409 /* Forget any namespaces added during an earlier parse of this element. */
9410 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009411
9412 localname = xmlParseQName(ctxt, &prefix);
9413 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009414 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9415 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009416 return(NULL);
9417 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009418 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009419
9420 /*
9421 * Now parse the attributes, it ends up with the ending
9422 *
9423 * (S Attribute)* S?
9424 */
9425 SKIP_BLANKS;
9426 GROW;
9427
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009428 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009429 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009430 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009431 const xmlChar *q = CUR_PTR;
9432 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009433 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009434
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009435 attname = xmlParseAttribute2(ctxt, prefix, localname,
9436 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009437 if ((attname == NULL) || (attvalue == NULL))
9438 goto next_attr;
9439 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009440
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009441 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9442 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9443 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009444
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009445 if (URL == NULL) {
9446 xmlErrMemory(ctxt, "dictionary allocation failure");
9447 if ((attvalue != NULL) && (alloc != 0))
9448 xmlFree(attvalue);
9449 return(NULL);
9450 }
9451 if (*URL != 0) {
9452 uri = xmlParseURI((const char *) URL);
9453 if (uri == NULL) {
9454 xmlNsErr(ctxt, XML_WAR_NS_URI,
9455 "xmlns: '%s' is not a valid URI\n",
9456 URL, NULL, NULL);
9457 } else {
9458 if (uri->scheme == NULL) {
9459 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9460 "xmlns: URI %s is not absolute\n",
9461 URL, NULL, NULL);
9462 }
9463 xmlFreeURI(uri);
9464 }
Daniel Veillard37334572008-07-31 08:20:02 +00009465 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009466 if (attname != ctxt->str_xml) {
9467 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9468 "xml namespace URI cannot be the default namespace\n",
9469 NULL, NULL, NULL);
9470 }
9471 goto next_attr;
9472 }
9473 if ((len == 29) &&
9474 (xmlStrEqual(URL,
9475 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9476 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9477 "reuse of the xmlns namespace name is forbidden\n",
9478 NULL, NULL, NULL);
9479 goto next_attr;
9480 }
9481 }
9482 /*
9483 * check that it's not a defined namespace
9484 */
9485 for (j = 1;j <= nbNs;j++)
9486 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9487 break;
9488 if (j <= nbNs)
9489 xmlErrAttributeDup(ctxt, NULL, attname);
9490 else
9491 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009492
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009493 } else if (aprefix == ctxt->str_xmlns) {
9494 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9495 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009496
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009497 if (attname == ctxt->str_xml) {
9498 if (URL != ctxt->str_xml_ns) {
9499 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500 "xml namespace prefix mapped to wrong URI\n",
9501 NULL, NULL, NULL);
9502 }
9503 /*
9504 * Do not keep a namespace definition node
9505 */
9506 goto next_attr;
9507 }
9508 if (URL == ctxt->str_xml_ns) {
9509 if (attname != ctxt->str_xml) {
9510 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9511 "xml namespace URI mapped to wrong prefix\n",
9512 NULL, NULL, NULL);
9513 }
9514 goto next_attr;
9515 }
9516 if (attname == ctxt->str_xmlns) {
9517 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9518 "redefinition of the xmlns prefix is forbidden\n",
9519 NULL, NULL, NULL);
9520 goto next_attr;
9521 }
9522 if ((len == 29) &&
9523 (xmlStrEqual(URL,
9524 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9525 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9526 "reuse of the xmlns namespace name is forbidden\n",
9527 NULL, NULL, NULL);
9528 goto next_attr;
9529 }
9530 if ((URL == NULL) || (URL[0] == 0)) {
9531 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9532 "xmlns:%s: Empty XML namespace is not allowed\n",
9533 attname, NULL, NULL);
9534 goto next_attr;
9535 } else {
9536 uri = xmlParseURI((const char *) URL);
9537 if (uri == NULL) {
9538 xmlNsErr(ctxt, XML_WAR_NS_URI,
9539 "xmlns:%s: '%s' is not a valid URI\n",
9540 attname, URL, NULL);
9541 } else {
9542 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9543 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9544 "xmlns:%s: URI %s is not absolute\n",
9545 attname, URL, NULL);
9546 }
9547 xmlFreeURI(uri);
9548 }
9549 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009550
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009551 /*
9552 * check that it's not a defined namespace
9553 */
9554 for (j = 1;j <= nbNs;j++)
9555 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9556 break;
9557 if (j <= nbNs)
9558 xmlErrAttributeDup(ctxt, aprefix, attname);
9559 else
9560 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9561
9562 } else {
9563 /*
9564 * Add the pair to atts
9565 */
9566 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9567 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9568 goto next_attr;
9569 }
9570 maxatts = ctxt->maxatts;
9571 atts = ctxt->atts;
9572 }
9573 ctxt->attallocs[nratts++] = alloc;
9574 atts[nbatts++] = attname;
9575 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009576 /*
9577 * The namespace URI field is used temporarily to point at the
9578 * base of the current input buffer for non-alloced attributes.
9579 * When the input buffer is reallocated, all the pointers become
9580 * invalid, but they can be reconstructed later.
9581 */
9582 if (alloc)
9583 atts[nbatts++] = NULL;
9584 else
9585 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009586 atts[nbatts++] = attvalue;
9587 attvalue += len;
9588 atts[nbatts++] = attvalue;
9589 /*
9590 * tag if some deallocation is needed
9591 */
9592 if (alloc != 0) attval = 1;
9593 attvalue = NULL; /* moved into atts */
9594 }
9595
9596next_attr:
9597 if ((attvalue != NULL) && (alloc != 0)) {
9598 xmlFree(attvalue);
9599 attvalue = NULL;
9600 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009601
9602 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009603 if (ctxt->instate == XML_PARSER_EOF)
9604 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009605 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9606 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009607 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009608 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9609 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009610 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009611 }
9612 SKIP_BLANKS;
9613 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9614 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009615 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009616 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009617 break;
9618 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009619 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009620 }
9621
9622 /* Reconstruct attribute value pointers. */
9623 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9624 if (atts[i+2] != NULL) {
9625 /*
9626 * Arithmetic on dangling pointers is technically undefined
9627 * behavior, but well...
9628 */
9629 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9630 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9631 atts[i+3] += offset; /* value */
9632 atts[i+4] += offset; /* valuend */
9633 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009634 }
9635
Daniel Veillard0fb18932003-09-07 09:14:37 +00009636 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009637 * The attributes defaulting
9638 */
9639 if (ctxt->attsDefault != NULL) {
9640 xmlDefAttrsPtr defaults;
9641
9642 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9643 if (defaults != NULL) {
9644 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009645 attname = defaults->values[5 * i];
9646 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009647
9648 /*
9649 * special work for namespaces defaulted defs
9650 */
9651 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9652 /*
9653 * check that it's not a defined namespace
9654 */
9655 for (j = 1;j <= nbNs;j++)
9656 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9657 break;
9658 if (j <= nbNs) continue;
9659
9660 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009661 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009662 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009663 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009664 nbNs++;
9665 }
9666 } else if (aprefix == ctxt->str_xmlns) {
9667 /*
9668 * check that it's not a defined namespace
9669 */
9670 for (j = 1;j <= nbNs;j++)
9671 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9672 break;
9673 if (j <= nbNs) continue;
9674
9675 nsname = xmlGetNamespace(ctxt, attname);
9676 if (nsname != defaults->values[2]) {
9677 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009678 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009679 nbNs++;
9680 }
9681 } else {
9682 /*
9683 * check that it's not a defined attribute
9684 */
9685 for (j = 0;j < nbatts;j+=5) {
9686 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9687 break;
9688 }
9689 if (j < nbatts) continue;
9690
9691 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9692 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009693 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009694 }
9695 maxatts = ctxt->maxatts;
9696 atts = ctxt->atts;
9697 }
9698 atts[nbatts++] = attname;
9699 atts[nbatts++] = aprefix;
9700 if (aprefix == NULL)
9701 atts[nbatts++] = NULL;
9702 else
9703 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009704 atts[nbatts++] = defaults->values[5 * i + 2];
9705 atts[nbatts++] = defaults->values[5 * i + 3];
9706 if ((ctxt->standalone == 1) &&
9707 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009708 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009709 "standalone: attribute %s on %s defaulted from external subset\n",
9710 attname, localname);
9711 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009712 nbdef++;
9713 }
9714 }
9715 }
9716 }
9717
Daniel Veillarde70c8772003-11-25 07:21:18 +00009718 /*
9719 * The attributes checkings
9720 */
9721 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009722 /*
9723 * The default namespace does not apply to attribute names.
9724 */
9725 if (atts[i + 1] != NULL) {
9726 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9727 if (nsname == NULL) {
9728 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9729 "Namespace prefix %s for %s on %s is not defined\n",
9730 atts[i + 1], atts[i], localname);
9731 }
9732 atts[i + 2] = nsname;
9733 } else
9734 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009735 /*
9736 * [ WFC: Unique Att Spec ]
9737 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009738 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009739 * As extended by the Namespace in XML REC.
9740 */
9741 for (j = 0; j < i;j += 5) {
9742 if (atts[i] == atts[j]) {
9743 if (atts[i+1] == atts[j+1]) {
9744 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9745 break;
9746 }
9747 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9748 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9749 "Namespaced Attribute %s in '%s' redefined\n",
9750 atts[i], nsname, NULL);
9751 break;
9752 }
9753 }
9754 }
9755 }
9756
Daniel Veillarde57ec792003-09-10 10:50:59 +00009757 nsname = xmlGetNamespace(ctxt, prefix);
9758 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009759 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9760 "Namespace prefix %s on %s is not defined\n",
9761 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009762 }
9763 *pref = prefix;
9764 *URI = nsname;
9765
9766 /*
9767 * SAX: Start of Element !
9768 */
9769 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9770 (!ctxt->disableSAX)) {
9771 if (nbNs > 0)
9772 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9773 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9774 nbatts / 5, nbdef, atts);
9775 else
9776 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9777 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9778 }
9779
9780 /*
9781 * Free up attribute allocated strings if needed
9782 */
9783 if (attval != 0) {
9784 for (i = 3,j = 0; j < nratts;i += 5,j++)
9785 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9786 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009787 }
9788
9789 return(localname);
9790}
9791
9792/**
9793 * xmlParseEndTag2:
9794 * @ctxt: an XML parser context
9795 * @line: line of the start tag
9796 * @nsNr: number of namespaces on the start tag
9797 *
9798 * parse an end of tag
9799 *
9800 * [42] ETag ::= '</' Name S? '>'
9801 *
9802 * With namespace
9803 *
9804 * [NS 9] ETag ::= '</' QName S? '>'
9805 */
9806
9807static void
9808xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009809 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009810 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009811 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009812
9813 GROW;
9814 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009815 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009816 return;
9817 }
9818 SKIP(2);
9819
David Kilzerdb07dd62016-02-12 09:58:29 -08009820 curLength = ctxt->input->end - ctxt->input->cur;
9821 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9822 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9823 if ((curLength >= (size_t)(tlen + 1)) &&
9824 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009825 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009826 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009827 goto done;
9828 }
9829 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009830 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009831 name = (xmlChar*)1;
9832 } else {
9833 if (prefix == NULL)
9834 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9835 else
9836 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9837 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009838
9839 /*
9840 * We should definitely be at the ending "S? '>'" part
9841 */
9842 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009843 if (ctxt->instate == XML_PARSER_EOF)
9844 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009845 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009846 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009847 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009848 } else
9849 NEXT1;
9850
9851 /*
9852 * [ WFC: Element Type Match ]
9853 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009854 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009855 *
9856 */
9857 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009858 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009859 if ((line == 0) && (ctxt->node != NULL))
9860 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009861 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009862 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009863 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009864 }
9865
9866 /*
9867 * SAX: End of Tag
9868 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009869done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009870 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9871 (!ctxt->disableSAX))
9872 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9873
Daniel Veillard0fb18932003-09-07 09:14:37 +00009874 spacePop(ctxt);
9875 if (nsNr != 0)
9876 nsPop(ctxt, nsNr);
9877 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009878}
9879
9880/**
Owen Taylor3473f882001-02-23 17:55:21 +00009881 * xmlParseCDSect:
9882 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009883 *
Owen Taylor3473f882001-02-23 17:55:21 +00009884 * Parse escaped pure raw content.
9885 *
9886 * [18] CDSect ::= CDStart CData CDEnd
9887 *
9888 * [19] CDStart ::= '<![CDATA['
9889 *
9890 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9891 *
9892 * [21] CDEnd ::= ']]>'
9893 */
9894void
9895xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9896 xmlChar *buf = NULL;
9897 int len = 0;
9898 int size = XML_PARSER_BUFFER_SIZE;
9899 int r, rl;
9900 int s, sl;
9901 int cur, l;
9902 int count = 0;
9903
Daniel Veillard8f597c32003-10-06 08:19:27 +00009904 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009905 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009906 SKIP(9);
9907 } else
9908 return;
9909
9910 ctxt->instate = XML_PARSER_CDATA_SECTION;
9911 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009912 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009913 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009914 ctxt->instate = XML_PARSER_CONTENT;
9915 return;
9916 }
9917 NEXTL(rl);
9918 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009919 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009920 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009921 ctxt->instate = XML_PARSER_CONTENT;
9922 return;
9923 }
9924 NEXTL(sl);
9925 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009926 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009927 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009928 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009929 return;
9930 }
William M. Brack871611b2003-10-18 04:53:14 +00009931 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009932 ((r != ']') || (s != ']') || (cur != '>'))) {
9933 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009934 xmlChar *tmp;
9935
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009936 if ((size > XML_MAX_TEXT_LENGTH) &&
9937 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9938 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939 "CData section too big found", NULL);
9940 xmlFree (buf);
9941 return;
9942 }
9943 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009944 if (tmp == NULL) {
9945 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009946 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009947 return;
9948 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009949 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009950 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009951 }
9952 COPY_BUF(rl,buf,len,r);
9953 r = s;
9954 rl = sl;
9955 s = cur;
9956 sl = l;
9957 count++;
9958 if (count > 50) {
9959 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009960 if (ctxt->instate == XML_PARSER_EOF) {
9961 xmlFree(buf);
9962 return;
9963 }
Owen Taylor3473f882001-02-23 17:55:21 +00009964 count = 0;
9965 }
9966 NEXTL(l);
9967 cur = CUR_CHAR(l);
9968 }
9969 buf[len] = 0;
9970 ctxt->instate = XML_PARSER_CONTENT;
9971 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009972 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009973 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009974 xmlFree(buf);
9975 return;
9976 }
9977 NEXTL(l);
9978
9979 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009980 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009981 */
9982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9983 if (ctxt->sax->cdataBlock != NULL)
9984 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009985 else if (ctxt->sax->characters != NULL)
9986 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009987 }
9988 xmlFree(buf);
9989}
9990
9991/**
9992 * xmlParseContent:
9993 * @ctxt: an XML parser context
9994 *
9995 * Parse a content:
9996 *
9997 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9998 */
9999
10000void
10001xmlParseContent(xmlParserCtxtPtr ctxt) {
10002 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +000010003 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010004 ((RAW != '<') || (NXT(1) != '/')) &&
10005 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010006 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +000010007 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +000010008 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010009
10010 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010011 * First case : a Processing Instruction.
10012 */
Daniel Veillardfdc91562002-07-01 21:52:03 +000010013 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010014 xmlParsePI(ctxt);
10015 }
10016
10017 /*
10018 * Second case : a CDSection
10019 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010020 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010021 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010022 xmlParseCDSect(ctxt);
10023 }
10024
10025 /*
10026 * Third case : a comment
10027 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010028 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010029 (NXT(2) == '-') && (NXT(3) == '-')) {
10030 xmlParseComment(ctxt);
10031 ctxt->instate = XML_PARSER_CONTENT;
10032 }
10033
10034 /*
10035 * Fourth case : a sub-element.
10036 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010037 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +000010038 xmlParseElement(ctxt);
10039 }
10040
10041 /*
10042 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010043 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +000010044 */
10045
Daniel Veillard21a0f912001-02-25 19:54:14 +000010046 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +000010047 xmlParseReference(ctxt);
10048 }
10049
10050 /*
10051 * Last case, text. Note that References are handled directly.
10052 */
10053 else {
10054 xmlParseCharData(ctxt, 0);
10055 }
10056
10057 GROW;
10058 /*
10059 * Pop-up of finished entities.
10060 */
Daniel Veillard561b7f82002-03-20 21:55:57 +000010061 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +000010062 xmlPopInput(ctxt);
10063 SHRINK;
10064
Daniel Veillardfdc91562002-07-01 21:52:03 +000010065 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010066 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10067 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080010068 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010069 break;
10070 }
10071 }
10072}
10073
10074/**
10075 * xmlParseElement:
10076 * @ctxt: an XML parser context
10077 *
10078 * parse an XML element, this is highly recursive
10079 *
10080 * [39] element ::= EmptyElemTag | STag content ETag
10081 *
10082 * [ WFC: Element Type Match ]
10083 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010084 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +000010085 *
Owen Taylor3473f882001-02-23 17:55:21 +000010086 */
10087
10088void
10089xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010090 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010091 const xmlChar *prefix = NULL;
10092 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010093 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010094 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010095 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010096 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010097
Daniel Veillard8915c152008-08-26 13:05:34 +000010098 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10099 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10100 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10101 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10102 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010103 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010104 return;
10105 }
10106
Owen Taylor3473f882001-02-23 17:55:21 +000010107 /* Capture start position */
10108 if (ctxt->record_info) {
10109 node_info.begin_pos = ctxt->input->consumed +
10110 (CUR_PTR - ctxt->input->base);
10111 node_info.begin_line = ctxt->input->line;
10112 }
10113
10114 if (ctxt->spaceNr == 0)
10115 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010116 else if (*ctxt->space == -2)
10117 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010118 else
10119 spacePush(ctxt, *ctxt->space);
10120
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010121 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010122#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010123 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010124#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010125 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010126#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010127 else
10128 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010129#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010130 if (ctxt->instate == XML_PARSER_EOF)
10131 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010132 if (name == NULL) {
10133 spacePop(ctxt);
10134 return;
10135 }
10136 namePush(ctxt, name);
10137 ret = ctxt->node;
10138
Daniel Veillard4432df22003-09-28 18:58:27 +000010139#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010140 /*
10141 * [ VC: Root Element Type ]
10142 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010143 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010144 */
10145 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10146 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10147 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010148#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010149
10150 /*
10151 * Check for an Empty Element.
10152 */
10153 if ((RAW == '/') && (NXT(1) == '>')) {
10154 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010155 if (ctxt->sax2) {
10156 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10157 (!ctxt->disableSAX))
10158 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010159#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010160 } else {
10161 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10162 (!ctxt->disableSAX))
10163 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010164#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010165 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010166 namePop(ctxt);
10167 spacePop(ctxt);
10168 if (nsNr != ctxt->nsNr)
10169 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010170 if ( ret != NULL && ctxt->record_info ) {
10171 node_info.end_pos = ctxt->input->consumed +
10172 (CUR_PTR - ctxt->input->base);
10173 node_info.end_line = ctxt->input->line;
10174 node_info.node = ret;
10175 xmlParserAddNodeInfo(ctxt, &node_info);
10176 }
10177 return;
10178 }
10179 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010180 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010181 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010182 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10183 "Couldn't find end of Start Tag %s line %d\n",
10184 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010185
10186 /*
10187 * end of parsing of this node.
10188 */
10189 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010190 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010191 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010192 if (nsNr != ctxt->nsNr)
10193 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010194
10195 /*
10196 * Capture end position and add node
10197 */
10198 if ( ret != NULL && ctxt->record_info ) {
10199 node_info.end_pos = ctxt->input->consumed +
10200 (CUR_PTR - ctxt->input->base);
10201 node_info.end_line = ctxt->input->line;
10202 node_info.node = ret;
10203 xmlParserAddNodeInfo(ctxt, &node_info);
10204 }
10205 return;
10206 }
10207
10208 /*
10209 * Parse the content of the element:
10210 */
10211 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010212 if (ctxt->instate == XML_PARSER_EOF)
10213 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010214 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010215 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010216 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010217 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010218
10219 /*
10220 * end of parsing of this node.
10221 */
10222 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010223 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010224 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010225 if (nsNr != ctxt->nsNr)
10226 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010227 return;
10228 }
10229
10230 /*
10231 * parse the end of tag: '</' should be here.
10232 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010233 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010234 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010235 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010236 }
10237#ifdef LIBXML_SAX1_ENABLED
10238 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010239 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010240#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010241
10242 /*
10243 * Capture end position and add node
10244 */
10245 if ( ret != NULL && ctxt->record_info ) {
10246 node_info.end_pos = ctxt->input->consumed +
10247 (CUR_PTR - ctxt->input->base);
10248 node_info.end_line = ctxt->input->line;
10249 node_info.node = ret;
10250 xmlParserAddNodeInfo(ctxt, &node_info);
10251 }
10252}
10253
10254/**
10255 * xmlParseVersionNum:
10256 * @ctxt: an XML parser context
10257 *
10258 * parse the XML version value.
10259 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010260 * [26] VersionNum ::= '1.' [0-9]+
10261 *
10262 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010263 *
10264 * Returns the string giving the XML version number, or NULL
10265 */
10266xmlChar *
10267xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10268 xmlChar *buf = NULL;
10269 int len = 0;
10270 int size = 10;
10271 xmlChar cur;
10272
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010273 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010274 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010275 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010276 return(NULL);
10277 }
10278 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010279 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010280 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010281 return(NULL);
10282 }
10283 buf[len++] = cur;
10284 NEXT;
10285 cur=CUR;
10286 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010287 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010288 return(NULL);
10289 }
10290 buf[len++] = cur;
10291 NEXT;
10292 cur=CUR;
10293 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010294 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010295 xmlChar *tmp;
10296
Owen Taylor3473f882001-02-23 17:55:21 +000010297 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010298 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10299 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010300 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010301 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010302 return(NULL);
10303 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010304 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010305 }
10306 buf[len++] = cur;
10307 NEXT;
10308 cur=CUR;
10309 }
10310 buf[len] = 0;
10311 return(buf);
10312}
10313
10314/**
10315 * xmlParseVersionInfo:
10316 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010317 *
Owen Taylor3473f882001-02-23 17:55:21 +000010318 * parse the XML version.
10319 *
10320 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010321 *
Owen Taylor3473f882001-02-23 17:55:21 +000010322 * [25] Eq ::= S? '=' S?
10323 *
10324 * Returns the version string, e.g. "1.0"
10325 */
10326
10327xmlChar *
10328xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10329 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010330
Daniel Veillarda07050d2003-10-19 14:46:32 +000010331 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010332 SKIP(7);
10333 SKIP_BLANKS;
10334 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010335 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010336 return(NULL);
10337 }
10338 NEXT;
10339 SKIP_BLANKS;
10340 if (RAW == '"') {
10341 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010342 version = xmlParseVersionNum(ctxt);
10343 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010344 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010345 } else
10346 NEXT;
10347 } else if (RAW == '\''){
10348 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010349 version = xmlParseVersionNum(ctxt);
10350 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010351 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010352 } else
10353 NEXT;
10354 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010355 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010356 }
10357 }
10358 return(version);
10359}
10360
10361/**
10362 * xmlParseEncName:
10363 * @ctxt: an XML parser context
10364 *
10365 * parse the XML encoding name
10366 *
10367 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10368 *
10369 * Returns the encoding name value or NULL
10370 */
10371xmlChar *
10372xmlParseEncName(xmlParserCtxtPtr ctxt) {
10373 xmlChar *buf = NULL;
10374 int len = 0;
10375 int size = 10;
10376 xmlChar cur;
10377
10378 cur = CUR;
10379 if (((cur >= 'a') && (cur <= 'z')) ||
10380 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010381 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010382 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010383 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010384 return(NULL);
10385 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010386
Owen Taylor3473f882001-02-23 17:55:21 +000010387 buf[len++] = cur;
10388 NEXT;
10389 cur = CUR;
10390 while (((cur >= 'a') && (cur <= 'z')) ||
10391 ((cur >= 'A') && (cur <= 'Z')) ||
10392 ((cur >= '0') && (cur <= '9')) ||
10393 (cur == '.') || (cur == '_') ||
10394 (cur == '-')) {
10395 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010396 xmlChar *tmp;
10397
Owen Taylor3473f882001-02-23 17:55:21 +000010398 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010399 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10400 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010401 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010402 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010403 return(NULL);
10404 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010405 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010406 }
10407 buf[len++] = cur;
10408 NEXT;
10409 cur = CUR;
10410 if (cur == 0) {
10411 SHRINK;
10412 GROW;
10413 cur = CUR;
10414 }
10415 }
10416 buf[len] = 0;
10417 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010418 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010419 }
10420 return(buf);
10421}
10422
10423/**
10424 * xmlParseEncodingDecl:
10425 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010426 *
Owen Taylor3473f882001-02-23 17:55:21 +000010427 * parse the XML encoding declaration
10428 *
10429 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10430 *
10431 * this setups the conversion filters.
10432 *
10433 * Returns the encoding value or NULL
10434 */
10435
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010436const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010437xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10438 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010439
10440 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010441 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010442 SKIP(8);
10443 SKIP_BLANKS;
10444 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010445 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010446 return(NULL);
10447 }
10448 NEXT;
10449 SKIP_BLANKS;
10450 if (RAW == '"') {
10451 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010452 encoding = xmlParseEncName(ctxt);
10453 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010454 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010455 xmlFree((xmlChar *) encoding);
10456 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010457 } else
10458 NEXT;
10459 } else if (RAW == '\''){
10460 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010461 encoding = xmlParseEncName(ctxt);
10462 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010463 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010464 xmlFree((xmlChar *) encoding);
10465 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010466 } else
10467 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010468 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010469 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010470 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010471
10472 /*
10473 * Non standard parsing, allowing the user to ignore encoding
10474 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010475 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10476 xmlFree((xmlChar *) encoding);
10477 return(NULL);
10478 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010479
Daniel Veillard6b621b82003-08-11 15:03:34 +000010480 /*
10481 * UTF-16 encoding stwich has already taken place at this stage,
10482 * more over the little-endian/big-endian selection is already done
10483 */
10484 if ((encoding != NULL) &&
10485 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10486 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010487 /*
10488 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010489 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010490 * document is apparently UTF-8 compatible, then raise an
10491 * encoding mismatch fatal error
10492 */
10493 if ((ctxt->encoding == NULL) &&
10494 (ctxt->input->buf != NULL) &&
10495 (ctxt->input->buf->encoder == NULL)) {
10496 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10497 "Document labelled UTF-16 but has UTF-8 content\n");
10498 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010499 if (ctxt->encoding != NULL)
10500 xmlFree((xmlChar *) ctxt->encoding);
10501 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010502 }
10503 /*
10504 * UTF-8 encoding is handled natively
10505 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010506 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010507 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10508 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010509 if (ctxt->encoding != NULL)
10510 xmlFree((xmlChar *) ctxt->encoding);
10511 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010512 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010513 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010514 xmlCharEncodingHandlerPtr handler;
10515
10516 if (ctxt->input->encoding != NULL)
10517 xmlFree((xmlChar *) ctxt->input->encoding);
10518 ctxt->input->encoding = encoding;
10519
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010520 handler = xmlFindCharEncodingHandler((const char *) encoding);
10521 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010522 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10523 /* failed to convert */
10524 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10525 return(NULL);
10526 }
Owen Taylor3473f882001-02-23 17:55:21 +000010527 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010528 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010529 "Unsupported encoding %s\n", encoding);
10530 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010531 }
10532 }
10533 }
10534 return(encoding);
10535}
10536
10537/**
10538 * xmlParseSDDecl:
10539 * @ctxt: an XML parser context
10540 *
10541 * parse the XML standalone declaration
10542 *
10543 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010544 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010545 *
10546 * [ VC: Standalone Document Declaration ]
10547 * TODO The standalone document declaration must have the value "no"
10548 * if any external markup declarations contain declarations of:
10549 * - attributes with default values, if elements to which these
10550 * attributes apply appear in the document without specifications
10551 * of values for these attributes, or
10552 * - entities (other than amp, lt, gt, apos, quot), if references
10553 * to those entities appear in the document, or
10554 * - attributes with values subject to normalization, where the
10555 * attribute appears in the document with a value which will change
10556 * as a result of normalization, or
10557 * - element types with element content, if white space occurs directly
10558 * within any instance of those types.
10559 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010560 * Returns:
10561 * 1 if standalone="yes"
10562 * 0 if standalone="no"
10563 * -2 if standalone attribute is missing or invalid
10564 * (A standalone value of -2 means that the XML declaration was found,
10565 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010566 */
10567
10568int
10569xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010570 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010571
10572 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010573 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010574 SKIP(10);
10575 SKIP_BLANKS;
10576 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010577 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010578 return(standalone);
10579 }
10580 NEXT;
10581 SKIP_BLANKS;
10582 if (RAW == '\''){
10583 NEXT;
10584 if ((RAW == 'n') && (NXT(1) == 'o')) {
10585 standalone = 0;
10586 SKIP(2);
10587 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10588 (NXT(2) == 's')) {
10589 standalone = 1;
10590 SKIP(3);
10591 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010592 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010593 }
10594 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010595 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010596 } else
10597 NEXT;
10598 } else if (RAW == '"'){
10599 NEXT;
10600 if ((RAW == 'n') && (NXT(1) == 'o')) {
10601 standalone = 0;
10602 SKIP(2);
10603 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10604 (NXT(2) == 's')) {
10605 standalone = 1;
10606 SKIP(3);
10607 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010608 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010609 }
10610 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010611 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010612 } else
10613 NEXT;
10614 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010615 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010616 }
10617 }
10618 return(standalone);
10619}
10620
10621/**
10622 * xmlParseXMLDecl:
10623 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010624 *
Owen Taylor3473f882001-02-23 17:55:21 +000010625 * parse an XML declaration header
10626 *
10627 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10628 */
10629
10630void
10631xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10632 xmlChar *version;
10633
10634 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010635 * This value for standalone indicates that the document has an
10636 * XML declaration but it does not have a standalone attribute.
10637 * It will be overwritten later if a standalone attribute is found.
10638 */
10639 ctxt->input->standalone = -2;
10640
10641 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010642 * We know that '<?xml' is here.
10643 */
10644 SKIP(5);
10645
William M. Brack76e95df2003-10-18 16:20:14 +000010646 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10648 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010649 }
10650 SKIP_BLANKS;
10651
10652 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010653 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010654 */
10655 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010656 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010657 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010658 } else {
10659 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10660 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010661 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010662 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010663 if (ctxt->options & XML_PARSE_OLD10) {
10664 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10665 "Unsupported version '%s'\n",
10666 version);
10667 } else {
10668 if ((version[0] == '1') && ((version[1] == '.'))) {
10669 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10670 "Unsupported version '%s'\n",
10671 version, NULL);
10672 } else {
10673 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10674 "Unsupported version '%s'\n",
10675 version);
10676 }
10677 }
Daniel Veillard19840942001-11-29 16:11:38 +000010678 }
10679 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010680 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010681 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010682 }
Owen Taylor3473f882001-02-23 17:55:21 +000010683
10684 /*
10685 * We may have the encoding declaration
10686 */
William M. Brack76e95df2003-10-18 16:20:14 +000010687 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010688 if ((RAW == '?') && (NXT(1) == '>')) {
10689 SKIP(2);
10690 return;
10691 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010693 }
10694 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010695 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10696 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010697 /*
10698 * The XML REC instructs us to stop parsing right here
10699 */
10700 return;
10701 }
10702
10703 /*
10704 * We may have the standalone status.
10705 */
William M. Brack76e95df2003-10-18 16:20:14 +000010706 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010707 if ((RAW == '?') && (NXT(1) == '>')) {
10708 SKIP(2);
10709 return;
10710 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010711 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010712 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010713
10714 /*
10715 * We can grow the input buffer freely at that point
10716 */
10717 GROW;
10718
Owen Taylor3473f882001-02-23 17:55:21 +000010719 SKIP_BLANKS;
10720 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10721
10722 SKIP_BLANKS;
10723 if ((RAW == '?') && (NXT(1) == '>')) {
10724 SKIP(2);
10725 } else if (RAW == '>') {
10726 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010727 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010728 NEXT;
10729 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010730 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010731 MOVETO_ENDTAG(CUR_PTR);
10732 NEXT;
10733 }
10734}
10735
10736/**
10737 * xmlParseMisc:
10738 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010739 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010740 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010741 *
10742 * [27] Misc ::= Comment | PI | S
10743 */
10744
10745void
10746xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010747 while ((ctxt->instate != XML_PARSER_EOF) &&
10748 (((RAW == '<') && (NXT(1) == '?')) ||
10749 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10750 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010751 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010752 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010753 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010754 NEXT;
10755 } else
10756 xmlParseComment(ctxt);
10757 }
10758}
10759
10760/**
10761 * xmlParseDocument:
10762 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010763 *
Owen Taylor3473f882001-02-23 17:55:21 +000010764 * parse an XML document (and build a tree if using the standard SAX
10765 * interface).
10766 *
10767 * [1] document ::= prolog element Misc*
10768 *
10769 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10770 *
10771 * Returns 0, -1 in case of error. the parser context is augmented
10772 * as a result of the parsing.
10773 */
10774
10775int
10776xmlParseDocument(xmlParserCtxtPtr ctxt) {
10777 xmlChar start[4];
10778 xmlCharEncoding enc;
10779
10780 xmlInitParser();
10781
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010782 if ((ctxt == NULL) || (ctxt->input == NULL))
10783 return(-1);
10784
Owen Taylor3473f882001-02-23 17:55:21 +000010785 GROW;
10786
10787 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010788 * SAX: detecting the level.
10789 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010790 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010791
10792 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010793 * SAX: beginning of the document processing.
10794 */
10795 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10796 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010797 if (ctxt->instate == XML_PARSER_EOF)
10798 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010799
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010800 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010801 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010802 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010803 * Get the 4 first bytes and decode the charset
10804 * if enc != XML_CHAR_ENCODING_NONE
10805 * plug some encoding conversion routines.
10806 */
10807 start[0] = RAW;
10808 start[1] = NXT(1);
10809 start[2] = NXT(2);
10810 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010811 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010812 if (enc != XML_CHAR_ENCODING_NONE) {
10813 xmlSwitchEncoding(ctxt, enc);
10814 }
Owen Taylor3473f882001-02-23 17:55:21 +000010815 }
10816
10817
10818 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010819 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010820 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010821 }
10822
10823 /*
10824 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010825 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010826 * than just the first line, unless the amount of data is really
10827 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010828 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010829 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10830 GROW;
10831 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010832 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010833
10834 /*
10835 * Note that we will switch encoding on the fly.
10836 */
10837 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010838 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10839 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010840 /*
10841 * The XML REC instructs us to stop parsing right here
10842 */
10843 return(-1);
10844 }
10845 ctxt->standalone = ctxt->input->standalone;
10846 SKIP_BLANKS;
10847 } else {
10848 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10849 }
10850 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10851 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010852 if (ctxt->instate == XML_PARSER_EOF)
10853 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010854 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10855 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10856 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10857 }
Owen Taylor3473f882001-02-23 17:55:21 +000010858
10859 /*
10860 * The Misc part of the Prolog
10861 */
10862 GROW;
10863 xmlParseMisc(ctxt);
10864
10865 /*
10866 * Then possibly doc type declaration(s) and more Misc
10867 * (doctypedecl Misc*)?
10868 */
10869 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010870 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010871
10872 ctxt->inSubset = 1;
10873 xmlParseDocTypeDecl(ctxt);
10874 if (RAW == '[') {
10875 ctxt->instate = XML_PARSER_DTD;
10876 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010877 if (ctxt->instate == XML_PARSER_EOF)
10878 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010879 }
10880
10881 /*
10882 * Create and update the external subset.
10883 */
10884 ctxt->inSubset = 2;
10885 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10886 (!ctxt->disableSAX))
10887 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10888 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010889 if (ctxt->instate == XML_PARSER_EOF)
10890 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010891 ctxt->inSubset = 0;
10892
Daniel Veillardac4118d2008-01-11 05:27:32 +000010893 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010894
10895 ctxt->instate = XML_PARSER_PROLOG;
10896 xmlParseMisc(ctxt);
10897 }
10898
10899 /*
10900 * Time to start parsing the tree itself
10901 */
10902 GROW;
10903 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010904 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10905 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010906 } else {
10907 ctxt->instate = XML_PARSER_CONTENT;
10908 xmlParseElement(ctxt);
10909 ctxt->instate = XML_PARSER_EPILOG;
10910
10911
10912 /*
10913 * The Misc part at the end
10914 */
10915 xmlParseMisc(ctxt);
10916
Daniel Veillard561b7f82002-03-20 21:55:57 +000010917 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010918 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010919 }
10920 ctxt->instate = XML_PARSER_EOF;
10921 }
10922
10923 /*
10924 * SAX: end of the document processing.
10925 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010926 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010927 ctxt->sax->endDocument(ctxt->userData);
10928
Daniel Veillard5997aca2002-03-18 18:36:20 +000010929 /*
10930 * Remove locally kept entity definitions if the tree was not built
10931 */
10932 if ((ctxt->myDoc != NULL) &&
10933 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10934 xmlFreeDoc(ctxt->myDoc);
10935 ctxt->myDoc = NULL;
10936 }
10937
Daniel Veillardae0765b2008-07-31 19:54:59 +000010938 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10939 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10940 if (ctxt->valid)
10941 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10942 if (ctxt->nsWellFormed)
10943 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10944 if (ctxt->options & XML_PARSE_OLD10)
10945 ctxt->myDoc->properties |= XML_DOC_OLD10;
10946 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010947 if (! ctxt->wellFormed) {
10948 ctxt->valid = 0;
10949 return(-1);
10950 }
Owen Taylor3473f882001-02-23 17:55:21 +000010951 return(0);
10952}
10953
10954/**
10955 * xmlParseExtParsedEnt:
10956 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010957 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010958 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010959 * An external general parsed entity is well-formed if it matches the
10960 * production labeled extParsedEnt.
10961 *
10962 * [78] extParsedEnt ::= TextDecl? content
10963 *
10964 * Returns 0, -1 in case of error. the parser context is augmented
10965 * as a result of the parsing.
10966 */
10967
10968int
10969xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10970 xmlChar start[4];
10971 xmlCharEncoding enc;
10972
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010973 if ((ctxt == NULL) || (ctxt->input == NULL))
10974 return(-1);
10975
Owen Taylor3473f882001-02-23 17:55:21 +000010976 xmlDefaultSAXHandlerInit();
10977
Daniel Veillard309f81d2003-09-23 09:02:53 +000010978 xmlDetectSAX2(ctxt);
10979
Owen Taylor3473f882001-02-23 17:55:21 +000010980 GROW;
10981
10982 /*
10983 * SAX: beginning of the document processing.
10984 */
10985 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10986 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10987
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010988 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010989 * Get the 4 first bytes and decode the charset
10990 * if enc != XML_CHAR_ENCODING_NONE
10991 * plug some encoding conversion routines.
10992 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010993 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10994 start[0] = RAW;
10995 start[1] = NXT(1);
10996 start[2] = NXT(2);
10997 start[3] = NXT(3);
10998 enc = xmlDetectCharEncoding(start, 4);
10999 if (enc != XML_CHAR_ENCODING_NONE) {
11000 xmlSwitchEncoding(ctxt, enc);
11001 }
Owen Taylor3473f882001-02-23 17:55:21 +000011002 }
11003
11004
11005 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011006 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011007 }
11008
11009 /*
11010 * Check for the XMLDecl in the Prolog.
11011 */
11012 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000011013 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011014
11015 /*
11016 * Note that we will switch encoding on the fly.
11017 */
11018 xmlParseXMLDecl(ctxt);
11019 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11020 /*
11021 * The XML REC instructs us to stop parsing right here
11022 */
11023 return(-1);
11024 }
11025 SKIP_BLANKS;
11026 } else {
11027 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11028 }
11029 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11030 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011031 if (ctxt->instate == XML_PARSER_EOF)
11032 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000011033
11034 /*
11035 * Doing validity checking on chunk doesn't make sense
11036 */
11037 ctxt->instate = XML_PARSER_CONTENT;
11038 ctxt->validate = 0;
11039 ctxt->loadsubset = 0;
11040 ctxt->depth = 0;
11041
11042 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011043 if (ctxt->instate == XML_PARSER_EOF)
11044 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011045
Owen Taylor3473f882001-02-23 17:55:21 +000011046 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011047 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011048 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011049 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011050 }
11051
11052 /*
11053 * SAX: end of the document processing.
11054 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011055 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011056 ctxt->sax->endDocument(ctxt->userData);
11057
11058 if (! ctxt->wellFormed) return(-1);
11059 return(0);
11060}
11061
Daniel Veillard73b013f2003-09-30 12:36:01 +000011062#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011063/************************************************************************
11064 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011065 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000011066 * *
11067 ************************************************************************/
11068
11069/**
11070 * xmlParseLookupSequence:
11071 * @ctxt: an XML parser context
11072 * @first: the first char to lookup
11073 * @next: the next char to lookup or zero
11074 * @third: the next char to lookup or zero
11075 *
11076 * Try to find if a sequence (first, next, third) or just (first next) or
11077 * (first) is available in the input stream.
11078 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11079 * to avoid rescanning sequences of bytes, it DOES change the state of the
11080 * parser, do not use liberally.
11081 *
11082 * Returns the index to the current parsing point if the full sequence
11083 * is available, -1 otherwise.
11084 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011085static int
Owen Taylor3473f882001-02-23 17:55:21 +000011086xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11087 xmlChar next, xmlChar third) {
11088 int base, len;
11089 xmlParserInputPtr in;
11090 const xmlChar *buf;
11091
11092 in = ctxt->input;
11093 if (in == NULL) return(-1);
11094 base = in->cur - in->base;
11095 if (base < 0) return(-1);
11096 if (ctxt->checkIndex > base)
11097 base = ctxt->checkIndex;
11098 if (in->buf == NULL) {
11099 buf = in->base;
11100 len = in->length;
11101 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011102 buf = xmlBufContent(in->buf->buffer);
11103 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011104 }
11105 /* take into account the sequence length */
11106 if (third) len -= 2;
11107 else if (next) len --;
11108 for (;base < len;base++) {
11109 if (buf[base] == first) {
11110 if (third != 0) {
11111 if ((buf[base + 1] != next) ||
11112 (buf[base + 2] != third)) continue;
11113 } else if (next != 0) {
11114 if (buf[base + 1] != next) continue;
11115 }
11116 ctxt->checkIndex = 0;
11117#ifdef DEBUG_PUSH
11118 if (next == 0)
11119 xmlGenericError(xmlGenericErrorContext,
11120 "PP: lookup '%c' found at %d\n",
11121 first, base);
11122 else if (third == 0)
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: lookup '%c%c' found at %d\n",
11125 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011126 else
Owen Taylor3473f882001-02-23 17:55:21 +000011127 xmlGenericError(xmlGenericErrorContext,
11128 "PP: lookup '%c%c%c' found at %d\n",
11129 first, next, third, base);
11130#endif
11131 return(base - (in->cur - in->base));
11132 }
11133 }
11134 ctxt->checkIndex = base;
11135#ifdef DEBUG_PUSH
11136 if (next == 0)
11137 xmlGenericError(xmlGenericErrorContext,
11138 "PP: lookup '%c' failed\n", first);
11139 else if (third == 0)
11140 xmlGenericError(xmlGenericErrorContext,
11141 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011142 else
Owen Taylor3473f882001-02-23 17:55:21 +000011143 xmlGenericError(xmlGenericErrorContext,
11144 "PP: lookup '%c%c%c' failed\n", first, next, third);
11145#endif
11146 return(-1);
11147}
11148
11149/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011150 * xmlParseGetLasts:
11151 * @ctxt: an XML parser context
11152 * @lastlt: pointer to store the last '<' from the input
11153 * @lastgt: pointer to store the last '>' from the input
11154 *
11155 * Lookup the last < and > in the current chunk
11156 */
11157static void
11158xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11159 const xmlChar **lastgt) {
11160 const xmlChar *tmp;
11161
11162 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11163 xmlGenericError(xmlGenericErrorContext,
11164 "Internal error: xmlParseGetLasts\n");
11165 return;
11166 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011167 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011168 tmp = ctxt->input->end;
11169 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011170 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011171 if (tmp < ctxt->input->base) {
11172 *lastlt = NULL;
11173 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011174 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011175 *lastlt = tmp;
11176 tmp++;
11177 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11178 if (*tmp == '\'') {
11179 tmp++;
11180 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11181 if (tmp < ctxt->input->end) tmp++;
11182 } else if (*tmp == '"') {
11183 tmp++;
11184 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11185 if (tmp < ctxt->input->end) tmp++;
11186 } else
11187 tmp++;
11188 }
11189 if (tmp < ctxt->input->end)
11190 *lastgt = tmp;
11191 else {
11192 tmp = *lastlt;
11193 tmp--;
11194 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11195 if (tmp >= ctxt->input->base)
11196 *lastgt = tmp;
11197 else
11198 *lastgt = NULL;
11199 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011200 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011201 } else {
11202 *lastlt = NULL;
11203 *lastgt = NULL;
11204 }
11205}
11206/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011207 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011208 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011209 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011210 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011211 *
11212 * Check that the block of characters is okay as SCdata content [20]
11213 *
11214 * Returns the number of bytes to pass if okay, a negative index where an
11215 * UTF-8 error occured otherwise
11216 */
11217static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011218xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011219 int ix;
11220 unsigned char c;
11221 int codepoint;
11222
11223 if ((utf == NULL) || (len <= 0))
11224 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011225
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011226 for (ix = 0; ix < len;) { /* string is 0-terminated */
11227 c = utf[ix];
11228 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11229 if (c >= 0x20)
11230 ix++;
11231 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11232 ix++;
11233 else
11234 return(-ix);
11235 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011236 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011237 if ((utf[ix+1] & 0xc0 ) != 0x80)
11238 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011239 codepoint = (utf[ix] & 0x1f) << 6;
11240 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011241 if (!xmlIsCharQ(codepoint))
11242 return(-ix);
11243 ix += 2;
11244 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011245 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011246 if (((utf[ix+1] & 0xc0) != 0x80) ||
11247 ((utf[ix+2] & 0xc0) != 0x80))
11248 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011249 codepoint = (utf[ix] & 0xf) << 12;
11250 codepoint |= (utf[ix+1] & 0x3f) << 6;
11251 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011252 if (!xmlIsCharQ(codepoint))
11253 return(-ix);
11254 ix += 3;
11255 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011256 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011257 if (((utf[ix+1] & 0xc0) != 0x80) ||
11258 ((utf[ix+2] & 0xc0) != 0x80) ||
11259 ((utf[ix+3] & 0xc0) != 0x80))
11260 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011261 codepoint = (utf[ix] & 0x7) << 18;
11262 codepoint |= (utf[ix+1] & 0x3f) << 12;
11263 codepoint |= (utf[ix+2] & 0x3f) << 6;
11264 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011265 if (!xmlIsCharQ(codepoint))
11266 return(-ix);
11267 ix += 4;
11268 } else /* unknown encoding */
11269 return(-ix);
11270 }
11271 return(ix);
11272}
11273
11274/**
Owen Taylor3473f882001-02-23 17:55:21 +000011275 * xmlParseTryOrFinish:
11276 * @ctxt: an XML parser context
11277 * @terminate: last chunk indicator
11278 *
11279 * Try to progress on parsing
11280 *
11281 * Returns zero if no parsing was possible
11282 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011283static int
Owen Taylor3473f882001-02-23 17:55:21 +000011284xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11285 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011286 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011287 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011288 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011289
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011290 if (ctxt->input == NULL)
11291 return(0);
11292
Owen Taylor3473f882001-02-23 17:55:21 +000011293#ifdef DEBUG_PUSH
11294 switch (ctxt->instate) {
11295 case XML_PARSER_EOF:
11296 xmlGenericError(xmlGenericErrorContext,
11297 "PP: try EOF\n"); break;
11298 case XML_PARSER_START:
11299 xmlGenericError(xmlGenericErrorContext,
11300 "PP: try START\n"); break;
11301 case XML_PARSER_MISC:
11302 xmlGenericError(xmlGenericErrorContext,
11303 "PP: try MISC\n");break;
11304 case XML_PARSER_COMMENT:
11305 xmlGenericError(xmlGenericErrorContext,
11306 "PP: try COMMENT\n");break;
11307 case XML_PARSER_PROLOG:
11308 xmlGenericError(xmlGenericErrorContext,
11309 "PP: try PROLOG\n");break;
11310 case XML_PARSER_START_TAG:
11311 xmlGenericError(xmlGenericErrorContext,
11312 "PP: try START_TAG\n");break;
11313 case XML_PARSER_CONTENT:
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: try CONTENT\n");break;
11316 case XML_PARSER_CDATA_SECTION:
11317 xmlGenericError(xmlGenericErrorContext,
11318 "PP: try CDATA_SECTION\n");break;
11319 case XML_PARSER_END_TAG:
11320 xmlGenericError(xmlGenericErrorContext,
11321 "PP: try END_TAG\n");break;
11322 case XML_PARSER_ENTITY_DECL:
11323 xmlGenericError(xmlGenericErrorContext,
11324 "PP: try ENTITY_DECL\n");break;
11325 case XML_PARSER_ENTITY_VALUE:
11326 xmlGenericError(xmlGenericErrorContext,
11327 "PP: try ENTITY_VALUE\n");break;
11328 case XML_PARSER_ATTRIBUTE_VALUE:
11329 xmlGenericError(xmlGenericErrorContext,
11330 "PP: try ATTRIBUTE_VALUE\n");break;
11331 case XML_PARSER_DTD:
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: try DTD\n");break;
11334 case XML_PARSER_EPILOG:
11335 xmlGenericError(xmlGenericErrorContext,
11336 "PP: try EPILOG\n");break;
11337 case XML_PARSER_PI:
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: try PI\n");break;
11340 case XML_PARSER_IGNORE:
11341 xmlGenericError(xmlGenericErrorContext,
11342 "PP: try IGNORE\n");break;
11343 }
11344#endif
11345
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011346 if ((ctxt->input != NULL) &&
11347 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011348 xmlSHRINK(ctxt);
11349 ctxt->checkIndex = 0;
11350 }
11351 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011352
Daniel Veillarde50ba812013-04-11 15:54:51 +080011353 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011354 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011355 return(0);
11356
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011357
Owen Taylor3473f882001-02-23 17:55:21 +000011358 /*
11359 * Pop-up of finished entities.
11360 */
11361 while ((RAW == 0) && (ctxt->inputNr > 1))
11362 xmlPopInput(ctxt);
11363
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011364 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011365 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011366 avail = ctxt->input->length -
11367 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011368 else {
11369 /*
11370 * If we are operating on converted input, try to flush
11371 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011372 * buffer. But do not do this in document start where
11373 * encoding="..." may not have been read and we work on a
11374 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011375 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011376 if ((ctxt->instate != XML_PARSER_START) &&
11377 (ctxt->input->buf->raw != NULL) &&
11378 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011379 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11380 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011381 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011382
11383 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011384 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11385 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011386 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011387 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011388 (ctxt->input->cur - ctxt->input->base);
11389 }
Owen Taylor3473f882001-02-23 17:55:21 +000011390 if (avail < 1)
11391 goto done;
11392 switch (ctxt->instate) {
11393 case XML_PARSER_EOF:
11394 /*
11395 * Document parsing is done !
11396 */
11397 goto done;
11398 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011399 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11400 xmlChar start[4];
11401 xmlCharEncoding enc;
11402
11403 /*
11404 * Very first chars read from the document flow.
11405 */
11406 if (avail < 4)
11407 goto done;
11408
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011409 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011410 * Get the 4 first bytes and decode the charset
11411 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011412 * plug some encoding conversion routines,
11413 * else xmlSwitchEncoding will set to (default)
11414 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011415 */
11416 start[0] = RAW;
11417 start[1] = NXT(1);
11418 start[2] = NXT(2);
11419 start[3] = NXT(3);
11420 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011421 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011422 break;
11423 }
Owen Taylor3473f882001-02-23 17:55:21 +000011424
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011425 if (avail < 2)
11426 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011427 cur = ctxt->input->cur[0];
11428 next = ctxt->input->cur[1];
11429 if (cur == 0) {
11430 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11431 ctxt->sax->setDocumentLocator(ctxt->userData,
11432 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011433 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011434 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011435#ifdef DEBUG_PUSH
11436 xmlGenericError(xmlGenericErrorContext,
11437 "PP: entering EOF\n");
11438#endif
11439 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11440 ctxt->sax->endDocument(ctxt->userData);
11441 goto done;
11442 }
11443 if ((cur == '<') && (next == '?')) {
11444 /* PI or XML decl */
11445 if (avail < 5) return(ret);
11446 if ((!terminate) &&
11447 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11448 return(ret);
11449 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11450 ctxt->sax->setDocumentLocator(ctxt->userData,
11451 &xmlDefaultSAXLocator);
11452 if ((ctxt->input->cur[2] == 'x') &&
11453 (ctxt->input->cur[3] == 'm') &&
11454 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011455 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011456 ret += 5;
11457#ifdef DEBUG_PUSH
11458 xmlGenericError(xmlGenericErrorContext,
11459 "PP: Parsing XML Decl\n");
11460#endif
11461 xmlParseXMLDecl(ctxt);
11462 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11463 /*
11464 * The XML REC instructs us to stop parsing right
11465 * here
11466 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011467 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011468 return(0);
11469 }
11470 ctxt->standalone = ctxt->input->standalone;
11471 if ((ctxt->encoding == NULL) &&
11472 (ctxt->input->encoding != NULL))
11473 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11474 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11475 (!ctxt->disableSAX))
11476 ctxt->sax->startDocument(ctxt->userData);
11477 ctxt->instate = XML_PARSER_MISC;
11478#ifdef DEBUG_PUSH
11479 xmlGenericError(xmlGenericErrorContext,
11480 "PP: entering MISC\n");
11481#endif
11482 } else {
11483 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11484 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11485 (!ctxt->disableSAX))
11486 ctxt->sax->startDocument(ctxt->userData);
11487 ctxt->instate = XML_PARSER_MISC;
11488#ifdef DEBUG_PUSH
11489 xmlGenericError(xmlGenericErrorContext,
11490 "PP: entering MISC\n");
11491#endif
11492 }
11493 } else {
11494 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11495 ctxt->sax->setDocumentLocator(ctxt->userData,
11496 &xmlDefaultSAXLocator);
11497 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011498 if (ctxt->version == NULL) {
11499 xmlErrMemory(ctxt, NULL);
11500 break;
11501 }
Owen Taylor3473f882001-02-23 17:55:21 +000011502 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11503 (!ctxt->disableSAX))
11504 ctxt->sax->startDocument(ctxt->userData);
11505 ctxt->instate = XML_PARSER_MISC;
11506#ifdef DEBUG_PUSH
11507 xmlGenericError(xmlGenericErrorContext,
11508 "PP: entering MISC\n");
11509#endif
11510 }
11511 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011512 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011513 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011514 const xmlChar *prefix = NULL;
11515 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011516 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011517
11518 if ((avail < 2) && (ctxt->inputNr == 1))
11519 goto done;
11520 cur = ctxt->input->cur[0];
11521 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011522 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011523 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011524 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11525 ctxt->sax->endDocument(ctxt->userData);
11526 goto done;
11527 }
11528 if (!terminate) {
11529 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011530 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011531 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011532 goto done;
11533 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11534 goto done;
11535 }
11536 }
11537 if (ctxt->spaceNr == 0)
11538 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011539 else if (*ctxt->space == -2)
11540 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011541 else
11542 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011543#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011544 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011545#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011546 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011547#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011548 else
11549 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011550#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011551 if (ctxt->instate == XML_PARSER_EOF)
11552 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011553 if (name == NULL) {
11554 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011555 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011556 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11557 ctxt->sax->endDocument(ctxt->userData);
11558 goto done;
11559 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011560#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011561 /*
11562 * [ VC: Root Element Type ]
11563 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011564 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011565 */
11566 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11567 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11568 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011569#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011570
11571 /*
11572 * Check for an Empty Element.
11573 */
11574 if ((RAW == '/') && (NXT(1) == '>')) {
11575 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011576
11577 if (ctxt->sax2) {
11578 if ((ctxt->sax != NULL) &&
11579 (ctxt->sax->endElementNs != NULL) &&
11580 (!ctxt->disableSAX))
11581 ctxt->sax->endElementNs(ctxt->userData, name,
11582 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011583 if (ctxt->nsNr - nsNr > 0)
11584 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011585#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011586 } else {
11587 if ((ctxt->sax != NULL) &&
11588 (ctxt->sax->endElement != NULL) &&
11589 (!ctxt->disableSAX))
11590 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011591#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011592 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011593 if (ctxt->instate == XML_PARSER_EOF)
11594 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011595 spacePop(ctxt);
11596 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011597 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011598 } else {
11599 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011600 }
Daniel Veillard65686452012-07-19 18:25:01 +080011601 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011602 break;
11603 }
11604 if (RAW == '>') {
11605 NEXT;
11606 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011607 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011608 "Couldn't find end of Start Tag %s\n",
11609 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011610 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011611 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011612 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011613 if (ctxt->sax2)
11614 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011615#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011616 else
11617 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011618#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011619
Daniel Veillarda880b122003-04-21 21:36:41 +000011620 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011621 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011622 break;
11623 }
11624 case XML_PARSER_CONTENT: {
11625 const xmlChar *test;
11626 unsigned int cons;
11627 if ((avail < 2) && (ctxt->inputNr == 1))
11628 goto done;
11629 cur = ctxt->input->cur[0];
11630 next = ctxt->input->cur[1];
11631
11632 test = CUR_PTR;
11633 cons = ctxt->input->consumed;
11634 if ((cur == '<') && (next == '/')) {
11635 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011636 break;
11637 } else if ((cur == '<') && (next == '?')) {
11638 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011639 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11640 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011641 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011642 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011643 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011644 ctxt->instate = XML_PARSER_CONTENT;
11645 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011646 } else if ((cur == '<') && (next != '!')) {
11647 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011648 break;
11649 } else if ((cur == '<') && (next == '!') &&
11650 (ctxt->input->cur[2] == '-') &&
11651 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011652 int term;
11653
11654 if (avail < 4)
11655 goto done;
11656 ctxt->input->cur += 4;
11657 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11658 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011659 if ((!terminate) && (term < 0)) {
11660 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011661 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011662 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011663 xmlParseComment(ctxt);
11664 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011665 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011666 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11667 (ctxt->input->cur[2] == '[') &&
11668 (ctxt->input->cur[3] == 'C') &&
11669 (ctxt->input->cur[4] == 'D') &&
11670 (ctxt->input->cur[5] == 'A') &&
11671 (ctxt->input->cur[6] == 'T') &&
11672 (ctxt->input->cur[7] == 'A') &&
11673 (ctxt->input->cur[8] == '[')) {
11674 SKIP(9);
11675 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011676 break;
11677 } else if ((cur == '<') && (next == '!') &&
11678 (avail < 9)) {
11679 goto done;
11680 } else if (cur == '&') {
11681 if ((!terminate) &&
11682 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11683 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011684 xmlParseReference(ctxt);
11685 } else {
11686 /* TODO Avoid the extra copy, handle directly !!! */
11687 /*
11688 * Goal of the following test is:
11689 * - minimize calls to the SAX 'character' callback
11690 * when they are mergeable
11691 * - handle an problem for isBlank when we only parse
11692 * a sequence of blank chars and the next one is
11693 * not available to check against '<' presence.
11694 * - tries to homogenize the differences in SAX
11695 * callbacks between the push and pull versions
11696 * of the parser.
11697 */
11698 if ((ctxt->inputNr == 1) &&
11699 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11700 if (!terminate) {
11701 if (ctxt->progressive) {
11702 if ((lastlt == NULL) ||
11703 (ctxt->input->cur > lastlt))
11704 goto done;
11705 } else if (xmlParseLookupSequence(ctxt,
11706 '<', 0, 0) < 0) {
11707 goto done;
11708 }
11709 }
11710 }
11711 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011712 xmlParseCharData(ctxt, 0);
11713 }
11714 /*
11715 * Pop-up of finished entities.
11716 */
11717 while ((RAW == 0) && (ctxt->inputNr > 1))
11718 xmlPopInput(ctxt);
11719 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011720 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11721 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011722 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011723 break;
11724 }
11725 break;
11726 }
11727 case XML_PARSER_END_TAG:
11728 if (avail < 2)
11729 goto done;
11730 if (!terminate) {
11731 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011732 /* > can be found unescaped in attribute values */
11733 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011734 goto done;
11735 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11736 goto done;
11737 }
11738 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011739 if (ctxt->sax2) {
11740 xmlParseEndTag2(ctxt,
11741 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11742 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011743 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011744 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011745 }
11746#ifdef LIBXML_SAX1_ENABLED
11747 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011748 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011749#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011750 if (ctxt->instate == XML_PARSER_EOF) {
11751 /* Nothing */
11752 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011753 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011754 } else {
11755 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011756 }
11757 break;
11758 case XML_PARSER_CDATA_SECTION: {
11759 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011760 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011761 * cdataBlock merge back contiguous callbacks.
11762 */
11763 int base;
11764
11765 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11766 if (base < 0) {
11767 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011768 int tmp;
11769
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011770 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011771 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011772 if (tmp < 0) {
11773 tmp = -tmp;
11774 ctxt->input->cur += tmp;
11775 goto encoding_error;
11776 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011777 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11778 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011779 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011780 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011781 else if (ctxt->sax->characters != NULL)
11782 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011783 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011784 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011785 if (ctxt->instate == XML_PARSER_EOF)
11786 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011787 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011788 ctxt->checkIndex = 0;
11789 }
11790 goto done;
11791 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011792 int tmp;
11793
David Kilzer4f8606c2016-01-05 13:38:09 -080011794 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011795 if ((tmp < 0) || (tmp != base)) {
11796 tmp = -tmp;
11797 ctxt->input->cur += tmp;
11798 goto encoding_error;
11799 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011800 if ((ctxt->sax != NULL) && (base == 0) &&
11801 (ctxt->sax->cdataBlock != NULL) &&
11802 (!ctxt->disableSAX)) {
11803 /*
11804 * Special case to provide identical behaviour
11805 * between pull and push parsers on enpty CDATA
11806 * sections
11807 */
11808 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11809 (!strncmp((const char *)&ctxt->input->cur[-9],
11810 "<![CDATA[", 9)))
11811 ctxt->sax->cdataBlock(ctxt->userData,
11812 BAD_CAST "", 0);
11813 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011814 (!ctxt->disableSAX)) {
11815 if (ctxt->sax->cdataBlock != NULL)
11816 ctxt->sax->cdataBlock(ctxt->userData,
11817 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011818 else if (ctxt->sax->characters != NULL)
11819 ctxt->sax->characters(ctxt->userData,
11820 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011821 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011822 if (ctxt->instate == XML_PARSER_EOF)
11823 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011824 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011825 ctxt->checkIndex = 0;
11826 ctxt->instate = XML_PARSER_CONTENT;
11827#ifdef DEBUG_PUSH
11828 xmlGenericError(xmlGenericErrorContext,
11829 "PP: entering CONTENT\n");
11830#endif
11831 }
11832 break;
11833 }
Owen Taylor3473f882001-02-23 17:55:21 +000011834 case XML_PARSER_MISC:
11835 SKIP_BLANKS;
11836 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011837 avail = ctxt->input->length -
11838 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011839 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011840 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011841 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011842 if (avail < 2)
11843 goto done;
11844 cur = ctxt->input->cur[0];
11845 next = ctxt->input->cur[1];
11846 if ((cur == '<') && (next == '?')) {
11847 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011848 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11849 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011850 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011851 }
Owen Taylor3473f882001-02-23 17:55:21 +000011852#ifdef DEBUG_PUSH
11853 xmlGenericError(xmlGenericErrorContext,
11854 "PP: Parsing PI\n");
11855#endif
11856 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011857 if (ctxt->instate == XML_PARSER_EOF)
11858 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011859 ctxt->instate = XML_PARSER_MISC;
11860 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011861 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011862 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011863 (ctxt->input->cur[2] == '-') &&
11864 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011865 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011866 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11867 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011868 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011869 }
Owen Taylor3473f882001-02-23 17:55:21 +000011870#ifdef DEBUG_PUSH
11871 xmlGenericError(xmlGenericErrorContext,
11872 "PP: Parsing Comment\n");
11873#endif
11874 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011875 if (ctxt->instate == XML_PARSER_EOF)
11876 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011877 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011878 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011879 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011880 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011881 (ctxt->input->cur[2] == 'D') &&
11882 (ctxt->input->cur[3] == 'O') &&
11883 (ctxt->input->cur[4] == 'C') &&
11884 (ctxt->input->cur[5] == 'T') &&
11885 (ctxt->input->cur[6] == 'Y') &&
11886 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011887 (ctxt->input->cur[8] == 'E')) {
11888 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011889 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11890 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011891 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011892 }
Owen Taylor3473f882001-02-23 17:55:21 +000011893#ifdef DEBUG_PUSH
11894 xmlGenericError(xmlGenericErrorContext,
11895 "PP: Parsing internal subset\n");
11896#endif
11897 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011898 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011899 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011900 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011901 if (ctxt->instate == XML_PARSER_EOF)
11902 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011903 if (RAW == '[') {
11904 ctxt->instate = XML_PARSER_DTD;
11905#ifdef DEBUG_PUSH
11906 xmlGenericError(xmlGenericErrorContext,
11907 "PP: entering DTD\n");
11908#endif
11909 } else {
11910 /*
11911 * Create and update the external subset.
11912 */
11913 ctxt->inSubset = 2;
11914 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11915 (ctxt->sax->externalSubset != NULL))
11916 ctxt->sax->externalSubset(ctxt->userData,
11917 ctxt->intSubName, ctxt->extSubSystem,
11918 ctxt->extSubURI);
11919 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011920 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011921 ctxt->instate = XML_PARSER_PROLOG;
11922#ifdef DEBUG_PUSH
11923 xmlGenericError(xmlGenericErrorContext,
11924 "PP: entering PROLOG\n");
11925#endif
11926 }
11927 } else if ((cur == '<') && (next == '!') &&
11928 (avail < 9)) {
11929 goto done;
11930 } else {
11931 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011932 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011933 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011934#ifdef DEBUG_PUSH
11935 xmlGenericError(xmlGenericErrorContext,
11936 "PP: entering START_TAG\n");
11937#endif
11938 }
11939 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011940 case XML_PARSER_PROLOG:
11941 SKIP_BLANKS;
11942 if (ctxt->input->buf == NULL)
11943 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11944 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011945 avail = xmlBufUse(ctxt->input->buf->buffer) -
11946 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011947 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011948 goto done;
11949 cur = ctxt->input->cur[0];
11950 next = ctxt->input->cur[1];
11951 if ((cur == '<') && (next == '?')) {
11952 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011953 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11954 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011955 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011956 }
Owen Taylor3473f882001-02-23 17:55:21 +000011957#ifdef DEBUG_PUSH
11958 xmlGenericError(xmlGenericErrorContext,
11959 "PP: Parsing PI\n");
11960#endif
11961 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011962 if (ctxt->instate == XML_PARSER_EOF)
11963 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011964 ctxt->instate = XML_PARSER_PROLOG;
11965 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011966 } else if ((cur == '<') && (next == '!') &&
11967 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11968 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011969 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11970 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011971 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011972 }
Owen Taylor3473f882001-02-23 17:55:21 +000011973#ifdef DEBUG_PUSH
11974 xmlGenericError(xmlGenericErrorContext,
11975 "PP: Parsing Comment\n");
11976#endif
11977 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011978 if (ctxt->instate == XML_PARSER_EOF)
11979 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011980 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011981 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011982 } else if ((cur == '<') && (next == '!') &&
11983 (avail < 4)) {
11984 goto done;
11985 } else {
11986 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011987 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011988 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011989 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011990#ifdef DEBUG_PUSH
11991 xmlGenericError(xmlGenericErrorContext,
11992 "PP: entering START_TAG\n");
11993#endif
11994 }
11995 break;
11996 case XML_PARSER_EPILOG:
11997 SKIP_BLANKS;
11998 if (ctxt->input->buf == NULL)
11999 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12000 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012001 avail = xmlBufUse(ctxt->input->buf->buffer) -
12002 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000012003 if (avail < 2)
12004 goto done;
12005 cur = ctxt->input->cur[0];
12006 next = ctxt->input->cur[1];
12007 if ((cur == '<') && (next == '?')) {
12008 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080012009 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12010 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000012011 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012012 }
Owen Taylor3473f882001-02-23 17:55:21 +000012013#ifdef DEBUG_PUSH
12014 xmlGenericError(xmlGenericErrorContext,
12015 "PP: Parsing PI\n");
12016#endif
12017 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012018 if (ctxt->instate == XML_PARSER_EOF)
12019 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012020 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080012021 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012022 } else if ((cur == '<') && (next == '!') &&
12023 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12024 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080012025 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12026 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012027 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080012028 }
Owen Taylor3473f882001-02-23 17:55:21 +000012029#ifdef DEBUG_PUSH
12030 xmlGenericError(xmlGenericErrorContext,
12031 "PP: Parsing Comment\n");
12032#endif
12033 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012034 if (ctxt->instate == XML_PARSER_EOF)
12035 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012036 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080012037 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012038 } else if ((cur == '<') && (next == '!') &&
12039 (avail < 4)) {
12040 goto done;
12041 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012042 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080012043 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012044#ifdef DEBUG_PUSH
12045 xmlGenericError(xmlGenericErrorContext,
12046 "PP: entering EOF\n");
12047#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012048 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012049 ctxt->sax->endDocument(ctxt->userData);
12050 goto done;
12051 }
12052 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012053 case XML_PARSER_DTD: {
12054 /*
12055 * Sorry but progressive parsing of the internal subset
12056 * is not expected to be supported. We first check that
12057 * the full content of the internal subset is available and
12058 * the parsing is launched only at that point.
12059 * Internal subset ends up with "']' S? '>'" in an unescaped
12060 * section and not in a ']]>' sequence which are conditional
12061 * sections (whoever argued to keep that crap in XML deserve
12062 * a place in hell !).
12063 */
12064 int base, i;
12065 xmlChar *buf;
12066 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012067 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000012068
12069 base = ctxt->input->cur - ctxt->input->base;
12070 if (base < 0) return(0);
12071 if (ctxt->checkIndex > base)
12072 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012073 buf = xmlBufContent(ctxt->input->buf->buffer);
12074 use = xmlBufUse(ctxt->input->buf->buffer);
12075 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000012076 if (quote != 0) {
12077 if (buf[base] == quote)
12078 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012079 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000012080 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012081 if ((quote == 0) && (buf[base] == '<')) {
12082 int found = 0;
12083 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012084 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000012085 (buf[base + 1] == '!') &&
12086 (buf[base + 2] == '-') &&
12087 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012088 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000012089 if ((buf[base] == '-') &&
12090 (buf[base + 1] == '-') &&
12091 (buf[base + 2] == '>')) {
12092 found = 1;
12093 base += 2;
12094 break;
12095 }
12096 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012097 if (!found) {
12098#if 0
12099 fprintf(stderr, "unfinished comment\n");
12100#endif
12101 break; /* for */
12102 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012103 continue;
12104 }
12105 }
Owen Taylor3473f882001-02-23 17:55:21 +000012106 if (buf[base] == '"') {
12107 quote = '"';
12108 continue;
12109 }
12110 if (buf[base] == '\'') {
12111 quote = '\'';
12112 continue;
12113 }
12114 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012115#if 0
12116 fprintf(stderr, "%c%c%c%c: ", buf[base],
12117 buf[base + 1], buf[base + 2], buf[base + 3]);
12118#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012119 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012120 break;
12121 if (buf[base + 1] == ']') {
12122 /* conditional crap, skip both ']' ! */
12123 base++;
12124 continue;
12125 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012126 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012127 if (buf[base + i] == '>') {
12128#if 0
12129 fprintf(stderr, "found\n");
12130#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012131 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012132 }
12133 if (!IS_BLANK_CH(buf[base + i])) {
12134#if 0
12135 fprintf(stderr, "not found\n");
12136#endif
12137 goto not_end_of_int_subset;
12138 }
Owen Taylor3473f882001-02-23 17:55:21 +000012139 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012140#if 0
12141 fprintf(stderr, "end of stream\n");
12142#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012143 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012144
Owen Taylor3473f882001-02-23 17:55:21 +000012145 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012146not_end_of_int_subset:
12147 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012148 }
12149 /*
12150 * We didn't found the end of the Internal subset
12151 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012152 if (quote == 0)
12153 ctxt->checkIndex = base;
12154 else
12155 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012156#ifdef DEBUG_PUSH
12157 if (next == 0)
12158 xmlGenericError(xmlGenericErrorContext,
12159 "PP: lookup of int subset end filed\n");
12160#endif
12161 goto done;
12162
12163found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012164 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012165 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012166 if (ctxt->instate == XML_PARSER_EOF)
12167 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012168 ctxt->inSubset = 2;
12169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12170 (ctxt->sax->externalSubset != NULL))
12171 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12172 ctxt->extSubSystem, ctxt->extSubURI);
12173 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012174 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012175 if (ctxt->instate == XML_PARSER_EOF)
12176 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012177 ctxt->instate = XML_PARSER_PROLOG;
12178 ctxt->checkIndex = 0;
12179#ifdef DEBUG_PUSH
12180 xmlGenericError(xmlGenericErrorContext,
12181 "PP: entering PROLOG\n");
12182#endif
12183 break;
12184 }
12185 case XML_PARSER_COMMENT:
12186 xmlGenericError(xmlGenericErrorContext,
12187 "PP: internal error, state == COMMENT\n");
12188 ctxt->instate = XML_PARSER_CONTENT;
12189#ifdef DEBUG_PUSH
12190 xmlGenericError(xmlGenericErrorContext,
12191 "PP: entering CONTENT\n");
12192#endif
12193 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012194 case XML_PARSER_IGNORE:
12195 xmlGenericError(xmlGenericErrorContext,
12196 "PP: internal error, state == IGNORE");
12197 ctxt->instate = XML_PARSER_DTD;
12198#ifdef DEBUG_PUSH
12199 xmlGenericError(xmlGenericErrorContext,
12200 "PP: entering DTD\n");
12201#endif
12202 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012203 case XML_PARSER_PI:
12204 xmlGenericError(xmlGenericErrorContext,
12205 "PP: internal error, state == PI\n");
12206 ctxt->instate = XML_PARSER_CONTENT;
12207#ifdef DEBUG_PUSH
12208 xmlGenericError(xmlGenericErrorContext,
12209 "PP: entering CONTENT\n");
12210#endif
12211 break;
12212 case XML_PARSER_ENTITY_DECL:
12213 xmlGenericError(xmlGenericErrorContext,
12214 "PP: internal error, state == ENTITY_DECL\n");
12215 ctxt->instate = XML_PARSER_DTD;
12216#ifdef DEBUG_PUSH
12217 xmlGenericError(xmlGenericErrorContext,
12218 "PP: entering DTD\n");
12219#endif
12220 break;
12221 case XML_PARSER_ENTITY_VALUE:
12222 xmlGenericError(xmlGenericErrorContext,
12223 "PP: internal error, state == ENTITY_VALUE\n");
12224 ctxt->instate = XML_PARSER_CONTENT;
12225#ifdef DEBUG_PUSH
12226 xmlGenericError(xmlGenericErrorContext,
12227 "PP: entering DTD\n");
12228#endif
12229 break;
12230 case XML_PARSER_ATTRIBUTE_VALUE:
12231 xmlGenericError(xmlGenericErrorContext,
12232 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12233 ctxt->instate = XML_PARSER_START_TAG;
12234#ifdef DEBUG_PUSH
12235 xmlGenericError(xmlGenericErrorContext,
12236 "PP: entering START_TAG\n");
12237#endif
12238 break;
12239 case XML_PARSER_SYSTEM_LITERAL:
12240 xmlGenericError(xmlGenericErrorContext,
12241 "PP: internal error, state == SYSTEM_LITERAL\n");
12242 ctxt->instate = XML_PARSER_START_TAG;
12243#ifdef DEBUG_PUSH
12244 xmlGenericError(xmlGenericErrorContext,
12245 "PP: entering START_TAG\n");
12246#endif
12247 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012248 case XML_PARSER_PUBLIC_LITERAL:
12249 xmlGenericError(xmlGenericErrorContext,
12250 "PP: internal error, state == PUBLIC_LITERAL\n");
12251 ctxt->instate = XML_PARSER_START_TAG;
12252#ifdef DEBUG_PUSH
12253 xmlGenericError(xmlGenericErrorContext,
12254 "PP: entering START_TAG\n");
12255#endif
12256 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012257 }
12258 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012259done:
Owen Taylor3473f882001-02-23 17:55:21 +000012260#ifdef DEBUG_PUSH
12261 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12262#endif
12263 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012264encoding_error:
12265 {
12266 char buffer[150];
12267
12268 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12269 ctxt->input->cur[0], ctxt->input->cur[1],
12270 ctxt->input->cur[2], ctxt->input->cur[3]);
12271 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12272 "Input is not proper UTF-8, indicate encoding !\n%s",
12273 BAD_CAST buffer, NULL);
12274 }
12275 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012276}
12277
12278/**
Daniel Veillard65686452012-07-19 18:25:01 +080012279 * xmlParseCheckTransition:
12280 * @ctxt: an XML parser context
12281 * @chunk: a char array
12282 * @size: the size in byte of the chunk
12283 *
12284 * Check depending on the current parser state if the chunk given must be
12285 * processed immediately or one need more data to advance on parsing.
12286 *
12287 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12288 */
12289static int
12290xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12291 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12292 return(-1);
12293 if (ctxt->instate == XML_PARSER_START_TAG) {
12294 if (memchr(chunk, '>', size) != NULL)
12295 return(1);
12296 return(0);
12297 }
12298 if (ctxt->progressive == XML_PARSER_COMMENT) {
12299 if (memchr(chunk, '>', size) != NULL)
12300 return(1);
12301 return(0);
12302 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012303 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12304 if (memchr(chunk, '>', size) != NULL)
12305 return(1);
12306 return(0);
12307 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012308 if (ctxt->progressive == XML_PARSER_PI) {
12309 if (memchr(chunk, '>', size) != NULL)
12310 return(1);
12311 return(0);
12312 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012313 if (ctxt->instate == XML_PARSER_END_TAG) {
12314 if (memchr(chunk, '>', size) != NULL)
12315 return(1);
12316 return(0);
12317 }
12318 if ((ctxt->progressive == XML_PARSER_DTD) ||
12319 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012320 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012321 return(1);
12322 return(0);
12323 }
Daniel Veillard65686452012-07-19 18:25:01 +080012324 return(1);
12325}
12326
12327/**
Owen Taylor3473f882001-02-23 17:55:21 +000012328 * xmlParseChunk:
12329 * @ctxt: an XML parser context
12330 * @chunk: an char array
12331 * @size: the size in byte of the chunk
12332 * @terminate: last chunk indicator
12333 *
12334 * Parse a Chunk of memory
12335 *
12336 * Returns zero if no error, the xmlParserErrors otherwise.
12337 */
12338int
12339xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12340 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012341 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012342 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012343 size_t old_avail = 0;
12344 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012345
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012346 if (ctxt == NULL)
12347 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012348 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012349 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012350 if (ctxt->instate == XML_PARSER_EOF)
12351 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012352 if (ctxt->instate == XML_PARSER_START)
12353 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012354 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12355 (chunk[size - 1] == '\r')) {
12356 end_in_lf = 1;
12357 size--;
12358 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012359
12360xmldecl_done:
12361
Owen Taylor3473f882001-02-23 17:55:21 +000012362 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12363 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012364 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12365 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012366 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012367
Daniel Veillard65686452012-07-19 18:25:01 +080012368 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012369 /*
12370 * Specific handling if we autodetected an encoding, we should not
12371 * push more than the first line ... which depend on the encoding
12372 * And only push the rest once the final encoding was detected
12373 */
12374 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12375 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012376 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012377
12378 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12379 BAD_CAST "UTF-16")) ||
12380 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12381 BAD_CAST "UTF16")))
12382 len = 90;
12383 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12384 BAD_CAST "UCS-4")) ||
12385 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12386 BAD_CAST "UCS4")))
12387 len = 180;
12388
12389 if (ctxt->input->buf->rawconsumed < len)
12390 len -= ctxt->input->buf->rawconsumed;
12391
Raul Hudeaba9716a2010-03-15 10:13:29 +010012392 /*
12393 * Change size for reading the initial declaration only
12394 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12395 * will blindly copy extra bytes from memory.
12396 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012397 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012398 remain = size - len;
12399 size = len;
12400 } else {
12401 remain = 0;
12402 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012403 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012404 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012405 if (res < 0) {
12406 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012407 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012408 return (XML_PARSER_EOF);
12409 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012410 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012411#ifdef DEBUG_PUSH
12412 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12413#endif
12414
Owen Taylor3473f882001-02-23 17:55:21 +000012415 } else if (ctxt->instate != XML_PARSER_EOF) {
12416 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12417 xmlParserInputBufferPtr in = ctxt->input->buf;
12418 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12419 (in->raw != NULL)) {
12420 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012421 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12422 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012423
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012424 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012425 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012426 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012427 xmlGenericError(xmlGenericErrorContext,
12428 "xmlParseChunk: encoder error\n");
12429 return(XML_ERR_INVALID_ENCODING);
12430 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012431 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012432 }
12433 }
12434 }
Daniel Veillard65686452012-07-19 18:25:01 +080012435 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012436 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012437 } else {
12438 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12439 avail = xmlBufUse(ctxt->input->buf->buffer);
12440 /*
12441 * Depending on the current state it may not be such
12442 * a good idea to try parsing if there is nothing in the chunk
12443 * which would be worth doing a parser state transition and we
12444 * need to wait for more data
12445 */
12446 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12447 (old_avail == 0) || (avail == 0) ||
12448 (xmlParseCheckTransition(ctxt,
12449 (const char *)&ctxt->input->base[old_avail],
12450 avail - old_avail)))
12451 xmlParseTryOrFinish(ctxt, terminate);
12452 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012453 if (ctxt->instate == XML_PARSER_EOF)
12454 return(ctxt->errNo);
12455
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012456 if ((ctxt->input != NULL) &&
12457 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12458 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12459 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12460 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012461 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012462 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012463 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12464 return(ctxt->errNo);
12465
12466 if (remain != 0) {
12467 chunk += size;
12468 size = remain;
12469 remain = 0;
12470 goto xmldecl_done;
12471 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012472 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12473 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012474 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12475 ctxt->input);
12476 size_t current = ctxt->input->cur - ctxt->input->base;
12477
Daniel Veillarda617e242006-01-09 14:38:44 +000012478 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012479
12480 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12481 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012482 }
Owen Taylor3473f882001-02-23 17:55:21 +000012483 if (terminate) {
12484 /*
12485 * Check for termination
12486 */
Daniel Veillard65686452012-07-19 18:25:01 +080012487 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012488
12489 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012490 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012491 cur_avail = ctxt->input->length -
12492 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012493 else
Daniel Veillard65686452012-07-19 18:25:01 +080012494 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12495 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012496 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012497
Owen Taylor3473f882001-02-23 17:55:21 +000012498 if ((ctxt->instate != XML_PARSER_EOF) &&
12499 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012500 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012501 }
Daniel Veillard65686452012-07-19 18:25:01 +080012502 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012503 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012504 }
Owen Taylor3473f882001-02-23 17:55:21 +000012505 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012506 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012507 ctxt->sax->endDocument(ctxt->userData);
12508 }
12509 ctxt->instate = XML_PARSER_EOF;
12510 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012511 if (ctxt->wellFormed == 0)
12512 return((xmlParserErrors) ctxt->errNo);
12513 else
12514 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012515}
12516
12517/************************************************************************
12518 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012519 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012520 * *
12521 ************************************************************************/
12522
12523/**
Owen Taylor3473f882001-02-23 17:55:21 +000012524 * xmlCreatePushParserCtxt:
12525 * @sax: a SAX handler
12526 * @user_data: The user data returned on SAX callbacks
12527 * @chunk: a pointer to an array of chars
12528 * @size: number of chars in the array
12529 * @filename: an optional file name or URI
12530 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012531 * Create a parser context for using the XML parser in push mode.
12532 * If @buffer and @size are non-NULL, the data is used to detect
12533 * the encoding. The remaining characters will be parsed so they
12534 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012535 * To allow content encoding detection, @size should be >= 4
12536 * The value of @filename is used for fetching external entities
12537 * and error/warning reports.
12538 *
12539 * Returns the new parser context or NULL
12540 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012541
Owen Taylor3473f882001-02-23 17:55:21 +000012542xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012543xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012544 const char *chunk, int size, const char *filename) {
12545 xmlParserCtxtPtr ctxt;
12546 xmlParserInputPtr inputStream;
12547 xmlParserInputBufferPtr buf;
12548 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12549
12550 /*
12551 * plug some encoding conversion routines
12552 */
12553 if ((chunk != NULL) && (size >= 4))
12554 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12555
12556 buf = xmlAllocParserInputBuffer(enc);
12557 if (buf == NULL) return(NULL);
12558
12559 ctxt = xmlNewParserCtxt();
12560 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012561 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012562 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012563 return(NULL);
12564 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012565 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012566 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12567 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012568 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012569 xmlFreeParserInputBuffer(buf);
12570 xmlFreeParserCtxt(ctxt);
12571 return(NULL);
12572 }
Owen Taylor3473f882001-02-23 17:55:21 +000012573 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012574#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012575 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012576#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012577 xmlFree(ctxt->sax);
12578 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12579 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012580 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012581 xmlFreeParserInputBuffer(buf);
12582 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012583 return(NULL);
12584 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012585 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12586 if (sax->initialized == XML_SAX2_MAGIC)
12587 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12588 else
12589 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012590 if (user_data != NULL)
12591 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012592 }
Owen Taylor3473f882001-02-23 17:55:21 +000012593 if (filename == NULL) {
12594 ctxt->directory = NULL;
12595 } else {
12596 ctxt->directory = xmlParserGetDirectory(filename);
12597 }
12598
12599 inputStream = xmlNewInputStream(ctxt);
12600 if (inputStream == NULL) {
12601 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012602 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012603 return(NULL);
12604 }
12605
12606 if (filename == NULL)
12607 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012608 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012609 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012610 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012611 if (inputStream->filename == NULL) {
12612 xmlFreeParserCtxt(ctxt);
12613 xmlFreeParserInputBuffer(buf);
12614 return(NULL);
12615 }
12616 }
Owen Taylor3473f882001-02-23 17:55:21 +000012617 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012618 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012619 inputPush(ctxt, inputStream);
12620
William M. Brack3a1cd212005-02-11 14:35:54 +000012621 /*
12622 * If the caller didn't provide an initial 'chunk' for determining
12623 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12624 * that it can be automatically determined later
12625 */
12626 if ((size == 0) || (chunk == NULL)) {
12627 ctxt->charset = XML_CHAR_ENCODING_NONE;
12628 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012629 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12630 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012631
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012632 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012633
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012634 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012635#ifdef DEBUG_PUSH
12636 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12637#endif
12638 }
12639
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012640 if (enc != XML_CHAR_ENCODING_NONE) {
12641 xmlSwitchEncoding(ctxt, enc);
12642 }
12643
Owen Taylor3473f882001-02-23 17:55:21 +000012644 return(ctxt);
12645}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012646#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012647
12648/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012649 * xmlHaltParser:
12650 * @ctxt: an XML parser context
12651 *
12652 * Blocks further parser processing don't override error
12653 * for internal use
12654 */
12655static void
12656xmlHaltParser(xmlParserCtxtPtr ctxt) {
12657 if (ctxt == NULL)
12658 return;
12659 ctxt->instate = XML_PARSER_EOF;
12660 ctxt->disableSAX = 1;
12661 if (ctxt->input != NULL) {
12662 /*
12663 * in case there was a specific allocation deallocate before
12664 * overriding base
12665 */
12666 if (ctxt->input->free != NULL) {
12667 ctxt->input->free((xmlChar *) ctxt->input->base);
12668 ctxt->input->free = NULL;
12669 }
12670 ctxt->input->cur = BAD_CAST"";
12671 ctxt->input->base = ctxt->input->cur;
12672 }
12673}
12674
12675/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012676 * xmlStopParser:
12677 * @ctxt: an XML parser context
12678 *
12679 * Blocks further parser processing
12680 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012681void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012682xmlStopParser(xmlParserCtxtPtr ctxt) {
12683 if (ctxt == NULL)
12684 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012685 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012686 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012687}
12688
12689/**
Owen Taylor3473f882001-02-23 17:55:21 +000012690 * xmlCreateIOParserCtxt:
12691 * @sax: a SAX handler
12692 * @user_data: The user data returned on SAX callbacks
12693 * @ioread: an I/O read function
12694 * @ioclose: an I/O close function
12695 * @ioctx: an I/O handler
12696 * @enc: the charset encoding if known
12697 *
12698 * Create a parser context for using the XML parser with an existing
12699 * I/O stream
12700 *
12701 * Returns the new parser context or NULL
12702 */
12703xmlParserCtxtPtr
12704xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12705 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12706 void *ioctx, xmlCharEncoding enc) {
12707 xmlParserCtxtPtr ctxt;
12708 xmlParserInputPtr inputStream;
12709 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012710
Daniel Veillard42595322004-11-08 10:52:06 +000012711 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012712
12713 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012714 if (buf == NULL) {
12715 if (ioclose != NULL)
12716 ioclose(ioctx);
12717 return (NULL);
12718 }
Owen Taylor3473f882001-02-23 17:55:21 +000012719
12720 ctxt = xmlNewParserCtxt();
12721 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012722 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012723 return(NULL);
12724 }
12725 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012726#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012727 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012728#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012729 xmlFree(ctxt->sax);
12730 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12731 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012732 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012733 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012734 return(NULL);
12735 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012736 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12737 if (sax->initialized == XML_SAX2_MAGIC)
12738 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12739 else
12740 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012741 if (user_data != NULL)
12742 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012743 }
Owen Taylor3473f882001-02-23 17:55:21 +000012744
12745 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12746 if (inputStream == NULL) {
12747 xmlFreeParserCtxt(ctxt);
12748 return(NULL);
12749 }
12750 inputPush(ctxt, inputStream);
12751
12752 return(ctxt);
12753}
12754
Daniel Veillard4432df22003-09-28 18:58:27 +000012755#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012756/************************************************************************
12757 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012758 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012759 * *
12760 ************************************************************************/
12761
12762/**
12763 * xmlIOParseDTD:
12764 * @sax: the SAX handler block or NULL
12765 * @input: an Input Buffer
12766 * @enc: the charset encoding if known
12767 *
12768 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012769 *
Owen Taylor3473f882001-02-23 17:55:21 +000012770 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012771 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012772 */
12773
12774xmlDtdPtr
12775xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12776 xmlCharEncoding enc) {
12777 xmlDtdPtr ret = NULL;
12778 xmlParserCtxtPtr ctxt;
12779 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012780 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012781
12782 if (input == NULL)
12783 return(NULL);
12784
12785 ctxt = xmlNewParserCtxt();
12786 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012787 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012788 return(NULL);
12789 }
12790
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012791 /* We are loading a DTD */
12792 ctxt->options |= XML_PARSE_DTDLOAD;
12793
Owen Taylor3473f882001-02-23 17:55:21 +000012794 /*
12795 * Set-up the SAX context
12796 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012797 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012798 if (ctxt->sax != NULL)
12799 xmlFree(ctxt->sax);
12800 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012801 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012802 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012803 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012804
12805 /*
12806 * generate a parser input from the I/O handler
12807 */
12808
Daniel Veillard43caefb2003-12-07 19:32:22 +000012809 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012810 if (pinput == NULL) {
12811 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012812 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012813 xmlFreeParserCtxt(ctxt);
12814 return(NULL);
12815 }
12816
12817 /*
12818 * plug some encoding conversion routines here.
12819 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012820 if (xmlPushInput(ctxt, pinput) < 0) {
12821 if (sax != NULL) ctxt->sax = NULL;
12822 xmlFreeParserCtxt(ctxt);
12823 return(NULL);
12824 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012825 if (enc != XML_CHAR_ENCODING_NONE) {
12826 xmlSwitchEncoding(ctxt, enc);
12827 }
Owen Taylor3473f882001-02-23 17:55:21 +000012828
12829 pinput->filename = NULL;
12830 pinput->line = 1;
12831 pinput->col = 1;
12832 pinput->base = ctxt->input->cur;
12833 pinput->cur = ctxt->input->cur;
12834 pinput->free = NULL;
12835
12836 /*
12837 * let's parse that entity knowing it's an external subset.
12838 */
12839 ctxt->inSubset = 2;
12840 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012841 if (ctxt->myDoc == NULL) {
12842 xmlErrMemory(ctxt, "New Doc failed");
12843 return(NULL);
12844 }
12845 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012846 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12847 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012848
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012849 if ((enc == XML_CHAR_ENCODING_NONE) &&
12850 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012851 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012852 * Get the 4 first bytes and decode the charset
12853 * if enc != XML_CHAR_ENCODING_NONE
12854 * plug some encoding conversion routines.
12855 */
12856 start[0] = RAW;
12857 start[1] = NXT(1);
12858 start[2] = NXT(2);
12859 start[3] = NXT(3);
12860 enc = xmlDetectCharEncoding(start, 4);
12861 if (enc != XML_CHAR_ENCODING_NONE) {
12862 xmlSwitchEncoding(ctxt, enc);
12863 }
12864 }
12865
Owen Taylor3473f882001-02-23 17:55:21 +000012866 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12867
12868 if (ctxt->myDoc != NULL) {
12869 if (ctxt->wellFormed) {
12870 ret = ctxt->myDoc->extSubset;
12871 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012872 if (ret != NULL) {
12873 xmlNodePtr tmp;
12874
12875 ret->doc = NULL;
12876 tmp = ret->children;
12877 while (tmp != NULL) {
12878 tmp->doc = NULL;
12879 tmp = tmp->next;
12880 }
12881 }
Owen Taylor3473f882001-02-23 17:55:21 +000012882 } else {
12883 ret = NULL;
12884 }
12885 xmlFreeDoc(ctxt->myDoc);
12886 ctxt->myDoc = NULL;
12887 }
12888 if (sax != NULL) ctxt->sax = NULL;
12889 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012890
Owen Taylor3473f882001-02-23 17:55:21 +000012891 return(ret);
12892}
12893
12894/**
12895 * xmlSAXParseDTD:
12896 * @sax: the SAX handler block
12897 * @ExternalID: a NAME* containing the External ID of the DTD
12898 * @SystemID: a NAME* containing the URL to the DTD
12899 *
12900 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012901 *
Owen Taylor3473f882001-02-23 17:55:21 +000012902 * Returns the resulting xmlDtdPtr or NULL in case of error.
12903 */
12904
12905xmlDtdPtr
12906xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12907 const xmlChar *SystemID) {
12908 xmlDtdPtr ret = NULL;
12909 xmlParserCtxtPtr ctxt;
12910 xmlParserInputPtr input = NULL;
12911 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012912 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012913
12914 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12915
12916 ctxt = xmlNewParserCtxt();
12917 if (ctxt == NULL) {
12918 return(NULL);
12919 }
12920
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012921 /* We are loading a DTD */
12922 ctxt->options |= XML_PARSE_DTDLOAD;
12923
Owen Taylor3473f882001-02-23 17:55:21 +000012924 /*
12925 * Set-up the SAX context
12926 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012927 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012928 if (ctxt->sax != NULL)
12929 xmlFree(ctxt->sax);
12930 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012931 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012932 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012933
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012934 /*
12935 * Canonicalise the system ID
12936 */
12937 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012938 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012939 xmlFreeParserCtxt(ctxt);
12940 return(NULL);
12941 }
Owen Taylor3473f882001-02-23 17:55:21 +000012942
12943 /*
12944 * Ask the Entity resolver to load the damn thing
12945 */
12946
12947 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012948 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12949 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012950 if (input == NULL) {
12951 if (sax != NULL) ctxt->sax = NULL;
12952 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012953 if (systemIdCanonic != NULL)
12954 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012955 return(NULL);
12956 }
12957
12958 /*
12959 * plug some encoding conversion routines here.
12960 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012961 if (xmlPushInput(ctxt, input) < 0) {
12962 if (sax != NULL) ctxt->sax = NULL;
12963 xmlFreeParserCtxt(ctxt);
12964 if (systemIdCanonic != NULL)
12965 xmlFree(systemIdCanonic);
12966 return(NULL);
12967 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012968 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12969 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12970 xmlSwitchEncoding(ctxt, enc);
12971 }
Owen Taylor3473f882001-02-23 17:55:21 +000012972
12973 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012974 input->filename = (char *) systemIdCanonic;
12975 else
12976 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012977 input->line = 1;
12978 input->col = 1;
12979 input->base = ctxt->input->cur;
12980 input->cur = ctxt->input->cur;
12981 input->free = NULL;
12982
12983 /*
12984 * let's parse that entity knowing it's an external subset.
12985 */
12986 ctxt->inSubset = 2;
12987 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012988 if (ctxt->myDoc == NULL) {
12989 xmlErrMemory(ctxt, "New Doc failed");
12990 if (sax != NULL) ctxt->sax = NULL;
12991 xmlFreeParserCtxt(ctxt);
12992 return(NULL);
12993 }
12994 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012995 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12996 ExternalID, SystemID);
12997 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12998
12999 if (ctxt->myDoc != NULL) {
13000 if (ctxt->wellFormed) {
13001 ret = ctxt->myDoc->extSubset;
13002 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000013003 if (ret != NULL) {
13004 xmlNodePtr tmp;
13005
13006 ret->doc = NULL;
13007 tmp = ret->children;
13008 while (tmp != NULL) {
13009 tmp->doc = NULL;
13010 tmp = tmp->next;
13011 }
13012 }
Owen Taylor3473f882001-02-23 17:55:21 +000013013 } else {
13014 ret = NULL;
13015 }
13016 xmlFreeDoc(ctxt->myDoc);
13017 ctxt->myDoc = NULL;
13018 }
13019 if (sax != NULL) ctxt->sax = NULL;
13020 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013021
Owen Taylor3473f882001-02-23 17:55:21 +000013022 return(ret);
13023}
13024
Daniel Veillard4432df22003-09-28 18:58:27 +000013025
Owen Taylor3473f882001-02-23 17:55:21 +000013026/**
13027 * xmlParseDTD:
13028 * @ExternalID: a NAME* containing the External ID of the DTD
13029 * @SystemID: a NAME* containing the URL to the DTD
13030 *
13031 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000013032 *
Owen Taylor3473f882001-02-23 17:55:21 +000013033 * Returns the resulting xmlDtdPtr or NULL in case of error.
13034 */
13035
13036xmlDtdPtr
13037xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13038 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13039}
Daniel Veillard4432df22003-09-28 18:58:27 +000013040#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013041
13042/************************************************************************
13043 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013044 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000013045 * *
13046 ************************************************************************/
13047
13048/**
Owen Taylor3473f882001-02-23 17:55:21 +000013049 * xmlParseCtxtExternalEntity:
13050 * @ctx: the existing parsing context
13051 * @URL: the URL for the entity to load
13052 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013053 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013054 *
13055 * Parse an external general entity within an existing parsing context
13056 * An external general parsed entity is well-formed if it matches the
13057 * production labeled extParsedEnt.
13058 *
13059 * [78] extParsedEnt ::= TextDecl? content
13060 *
13061 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062 * the parser error code otherwise
13063 */
13064
13065int
13066xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013067 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000013068 xmlParserCtxtPtr ctxt;
13069 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013070 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013071 xmlSAXHandlerPtr oldsax = NULL;
13072 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013073 xmlChar start[4];
13074 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013075
Daniel Veillardce682bc2004-11-05 17:22:25 +000013076 if (ctx == NULL) return(-1);
13077
Daniel Veillard0161e632008-08-28 15:36:32 +000013078 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13079 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013080 return(XML_ERR_ENTITY_LOOP);
13081 }
13082
Daniel Veillardcda96922001-08-21 10:56:31 +000013083 if (lst != NULL)
13084 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013085 if ((URL == NULL) && (ID == NULL))
13086 return(-1);
13087 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13088 return(-1);
13089
Rob Richards798743a2009-06-19 13:54:25 -040013090 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000013091 if (ctxt == NULL) {
13092 return(-1);
13093 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013094
Owen Taylor3473f882001-02-23 17:55:21 +000013095 oldsax = ctxt->sax;
13096 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013097 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013098 newDoc = xmlNewDoc(BAD_CAST "1.0");
13099 if (newDoc == NULL) {
13100 xmlFreeParserCtxt(ctxt);
13101 return(-1);
13102 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013103 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013104 if (ctx->myDoc->dict) {
13105 newDoc->dict = ctx->myDoc->dict;
13106 xmlDictReference(newDoc->dict);
13107 }
Owen Taylor3473f882001-02-23 17:55:21 +000013108 if (ctx->myDoc != NULL) {
13109 newDoc->intSubset = ctx->myDoc->intSubset;
13110 newDoc->extSubset = ctx->myDoc->extSubset;
13111 }
13112 if (ctx->myDoc->URL != NULL) {
13113 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13114 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013115 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13116 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013117 ctxt->sax = oldsax;
13118 xmlFreeParserCtxt(ctxt);
13119 newDoc->intSubset = NULL;
13120 newDoc->extSubset = NULL;
13121 xmlFreeDoc(newDoc);
13122 return(-1);
13123 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013124 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013125 nodePush(ctxt, newDoc->children);
13126 if (ctx->myDoc == NULL) {
13127 ctxt->myDoc = newDoc;
13128 } else {
13129 ctxt->myDoc = ctx->myDoc;
13130 newDoc->children->doc = ctx->myDoc;
13131 }
13132
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013133 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013134 * Get the 4 first bytes and decode the charset
13135 * if enc != XML_CHAR_ENCODING_NONE
13136 * plug some encoding conversion routines.
13137 */
13138 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013139 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13140 start[0] = RAW;
13141 start[1] = NXT(1);
13142 start[2] = NXT(2);
13143 start[3] = NXT(3);
13144 enc = xmlDetectCharEncoding(start, 4);
13145 if (enc != XML_CHAR_ENCODING_NONE) {
13146 xmlSwitchEncoding(ctxt, enc);
13147 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013148 }
13149
Owen Taylor3473f882001-02-23 17:55:21 +000013150 /*
13151 * Parse a possible text declaration first
13152 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013153 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013154 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013155 /*
13156 * An XML-1.0 document can't reference an entity not XML-1.0
13157 */
13158 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13159 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013160 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013161 "Version mismatch between document and entity\n");
13162 }
Owen Taylor3473f882001-02-23 17:55:21 +000013163 }
13164
13165 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013166 * If the user provided its own SAX callbacks then reuse the
13167 * useData callback field, otherwise the expected setup in a
13168 * DOM builder is to have userData == ctxt
13169 */
13170 if (ctx->userData == ctx)
13171 ctxt->userData = ctxt;
13172 else
13173 ctxt->userData = ctx->userData;
13174
13175 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013176 * Doing validity checking on chunk doesn't make sense
13177 */
13178 ctxt->instate = XML_PARSER_CONTENT;
13179 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013180 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013181 ctxt->loadsubset = ctx->loadsubset;
13182 ctxt->depth = ctx->depth + 1;
13183 ctxt->replaceEntities = ctx->replaceEntities;
13184 if (ctxt->validate) {
13185 ctxt->vctxt.error = ctx->vctxt.error;
13186 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013187 } else {
13188 ctxt->vctxt.error = NULL;
13189 ctxt->vctxt.warning = NULL;
13190 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013191 ctxt->vctxt.nodeTab = NULL;
13192 ctxt->vctxt.nodeNr = 0;
13193 ctxt->vctxt.nodeMax = 0;
13194 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013195 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13196 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013197 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13198 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13199 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013200 ctxt->dictNames = ctx->dictNames;
13201 ctxt->attsDefault = ctx->attsDefault;
13202 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013203 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013204
13205 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013206
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013207 ctx->validate = ctxt->validate;
13208 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013209 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013210 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013211 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013212 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013213 }
13214 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013215 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013216 }
13217
13218 if (!ctxt->wellFormed) {
13219 if (ctxt->errNo == 0)
13220 ret = 1;
13221 else
13222 ret = ctxt->errNo;
13223 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013224 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013225 xmlNodePtr cur;
13226
13227 /*
13228 * Return the newly created nodeset after unlinking it from
13229 * they pseudo parent.
13230 */
13231 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013232 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013233 while (cur != NULL) {
13234 cur->parent = NULL;
13235 cur = cur->next;
13236 }
13237 newDoc->children->children = NULL;
13238 }
13239 ret = 0;
13240 }
13241 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013242 ctxt->dict = NULL;
13243 ctxt->attsDefault = NULL;
13244 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013245 xmlFreeParserCtxt(ctxt);
13246 newDoc->intSubset = NULL;
13247 newDoc->extSubset = NULL;
13248 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013249
Owen Taylor3473f882001-02-23 17:55:21 +000013250 return(ret);
13251}
13252
13253/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013254 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013255 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013256 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013257 * @sax: the SAX handler bloc (possibly NULL)
13258 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13259 * @depth: Used for loop detection, use 0
13260 * @URL: the URL for the entity to load
13261 * @ID: the System ID for the entity to load
13262 * @list: the return value for the set of parsed nodes
13263 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013264 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013265 *
13266 * Returns 0 if the entity is well formed, -1 in case of args problem and
13267 * the parser error code otherwise
13268 */
13269
Daniel Veillard7d515752003-09-26 19:12:37 +000013270static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013271xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13272 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013273 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013274 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013275 xmlParserCtxtPtr ctxt;
13276 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013277 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013278 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013279 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013280 xmlChar start[4];
13281 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013282
Daniel Veillard0161e632008-08-28 15:36:32 +000013283 if (((depth > 40) &&
13284 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13285 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013286 return(XML_ERR_ENTITY_LOOP);
13287 }
13288
Owen Taylor3473f882001-02-23 17:55:21 +000013289 if (list != NULL)
13290 *list = NULL;
13291 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013292 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013293 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013294 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013295
13296
Rob Richards9c0aa472009-03-26 18:10:19 +000013297 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013298 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013299 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013300 if (oldctxt != NULL) {
13301 ctxt->_private = oldctxt->_private;
13302 ctxt->loadsubset = oldctxt->loadsubset;
13303 ctxt->validate = oldctxt->validate;
13304 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013305 ctxt->record_info = oldctxt->record_info;
13306 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13307 ctxt->node_seq.length = oldctxt->node_seq.length;
13308 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013309 } else {
13310 /*
13311 * Doing validity checking on chunk without context
13312 * doesn't make sense
13313 */
13314 ctxt->_private = NULL;
13315 ctxt->validate = 0;
13316 ctxt->external = 2;
13317 ctxt->loadsubset = 0;
13318 }
Owen Taylor3473f882001-02-23 17:55:21 +000013319 if (sax != NULL) {
13320 oldsax = ctxt->sax;
13321 ctxt->sax = sax;
13322 if (user_data != NULL)
13323 ctxt->userData = user_data;
13324 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013325 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013326 newDoc = xmlNewDoc(BAD_CAST "1.0");
13327 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013328 ctxt->node_seq.maximum = 0;
13329 ctxt->node_seq.length = 0;
13330 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013331 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013332 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013333 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013334 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013335 newDoc->intSubset = doc->intSubset;
13336 newDoc->extSubset = doc->extSubset;
13337 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013338 xmlDictReference(newDoc->dict);
13339
Owen Taylor3473f882001-02-23 17:55:21 +000013340 if (doc->URL != NULL) {
13341 newDoc->URL = xmlStrdup(doc->URL);
13342 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013343 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13344 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013345 if (sax != NULL)
13346 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013347 ctxt->node_seq.maximum = 0;
13348 ctxt->node_seq.length = 0;
13349 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013350 xmlFreeParserCtxt(ctxt);
13351 newDoc->intSubset = NULL;
13352 newDoc->extSubset = NULL;
13353 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013354 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013355 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013356 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013357 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013358 ctxt->myDoc = doc;
13359 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013360
Daniel Veillard0161e632008-08-28 15:36:32 +000013361 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013362 * Get the 4 first bytes and decode the charset
13363 * if enc != XML_CHAR_ENCODING_NONE
13364 * plug some encoding conversion routines.
13365 */
13366 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013367 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13368 start[0] = RAW;
13369 start[1] = NXT(1);
13370 start[2] = NXT(2);
13371 start[3] = NXT(3);
13372 enc = xmlDetectCharEncoding(start, 4);
13373 if (enc != XML_CHAR_ENCODING_NONE) {
13374 xmlSwitchEncoding(ctxt, enc);
13375 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013376 }
13377
Owen Taylor3473f882001-02-23 17:55:21 +000013378 /*
13379 * Parse a possible text declaration first
13380 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013381 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013382 xmlParseTextDecl(ctxt);
13383 }
13384
Owen Taylor3473f882001-02-23 17:55:21 +000013385 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013386 ctxt->depth = depth;
13387
13388 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013389
Daniel Veillard561b7f82002-03-20 21:55:57 +000013390 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013391 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013392 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013393 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013394 }
13395 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013396 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013397 }
13398
13399 if (!ctxt->wellFormed) {
13400 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013401 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013402 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013403 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013404 } else {
13405 if (list != NULL) {
13406 xmlNodePtr cur;
13407
13408 /*
13409 * Return the newly created nodeset after unlinking it from
13410 * they pseudo parent.
13411 */
13412 cur = newDoc->children->children;
13413 *list = cur;
13414 while (cur != NULL) {
13415 cur->parent = NULL;
13416 cur = cur->next;
13417 }
13418 newDoc->children->children = NULL;
13419 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013420 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013421 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013422
13423 /*
13424 * Record in the parent context the number of entities replacement
13425 * done when parsing that reference.
13426 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013427 if (oldctxt != NULL)
13428 oldctxt->nbentities += ctxt->nbentities;
13429
Daniel Veillard0161e632008-08-28 15:36:32 +000013430 /*
13431 * Also record the size of the entity parsed
13432 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013433 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013434 oldctxt->sizeentities += ctxt->input->consumed;
13435 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13436 }
13437 /*
13438 * And record the last error if any
13439 */
13440 if (ctxt->lastError.code != XML_ERR_OK)
13441 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13442
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013443 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013444 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013445 if (oldctxt != NULL) {
13446 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13447 oldctxt->node_seq.length = ctxt->node_seq.length;
13448 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13449 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013450 ctxt->node_seq.maximum = 0;
13451 ctxt->node_seq.length = 0;
13452 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013453 xmlFreeParserCtxt(ctxt);
13454 newDoc->intSubset = NULL;
13455 newDoc->extSubset = NULL;
13456 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013457
Owen Taylor3473f882001-02-23 17:55:21 +000013458 return(ret);
13459}
13460
Daniel Veillard81273902003-09-30 00:43:48 +000013461#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013462/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013463 * xmlParseExternalEntity:
13464 * @doc: the document the chunk pertains to
13465 * @sax: the SAX handler bloc (possibly NULL)
13466 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13467 * @depth: Used for loop detection, use 0
13468 * @URL: the URL for the entity to load
13469 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013470 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013471 *
13472 * Parse an external general entity
13473 * An external general parsed entity is well-formed if it matches the
13474 * production labeled extParsedEnt.
13475 *
13476 * [78] extParsedEnt ::= TextDecl? content
13477 *
13478 * Returns 0 if the entity is well formed, -1 in case of args problem and
13479 * the parser error code otherwise
13480 */
13481
13482int
13483xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013484 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013485 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013486 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013487}
13488
13489/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013490 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013491 * @doc: the document the chunk pertains to
13492 * @sax: the SAX handler bloc (possibly NULL)
13493 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13494 * @depth: Used for loop detection, use 0
13495 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013496 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013497 *
13498 * Parse a well-balanced chunk of an XML document
13499 * called by the parser
13500 * The allowed sequence for the Well Balanced Chunk is the one defined by
13501 * the content production in the XML grammar:
13502 *
13503 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13504 *
13505 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13506 * the parser error code otherwise
13507 */
13508
13509int
13510xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013511 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013512 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13513 depth, string, lst, 0 );
13514}
Daniel Veillard81273902003-09-30 00:43:48 +000013515#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013516
13517/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013518 * xmlParseBalancedChunkMemoryInternal:
13519 * @oldctxt: the existing parsing context
13520 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13521 * @user_data: the user data field for the parser context
13522 * @lst: the return value for the set of parsed nodes
13523 *
13524 *
13525 * Parse a well-balanced chunk of an XML document
13526 * called by the parser
13527 * The allowed sequence for the Well Balanced Chunk is the one defined by
13528 * the content production in the XML grammar:
13529 *
13530 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13531 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013532 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13533 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013534 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013535 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013536 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013537 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013538static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013539xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13540 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13541 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013542 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013543 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013544 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013545 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013546 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013547 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013548 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013549#ifdef SAX2
13550 int i;
13551#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013552
Daniel Veillard0161e632008-08-28 15:36:32 +000013553 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13554 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013555 return(XML_ERR_ENTITY_LOOP);
13556 }
13557
13558
13559 if (lst != NULL)
13560 *lst = NULL;
13561 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013562 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013563
13564 size = xmlStrlen(string);
13565
13566 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013567 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013568 if (user_data != NULL)
13569 ctxt->userData = user_data;
13570 else
13571 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013572 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13573 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013574 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13575 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13576 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013577
Daniel Veillard74eaec12009-08-26 15:57:20 +020013578#ifdef SAX2
13579 /* propagate namespaces down the entity */
13580 for (i = 0;i < oldctxt->nsNr;i += 2) {
13581 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13582 }
13583#endif
13584
Daniel Veillard328f48c2002-11-15 15:24:34 +000013585 oldsax = ctxt->sax;
13586 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013587 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013588 ctxt->replaceEntities = oldctxt->replaceEntities;
13589 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013590
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013591 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013592 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013593 newDoc = xmlNewDoc(BAD_CAST "1.0");
13594 if (newDoc == NULL) {
13595 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013596 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013597 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013598 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013599 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013600 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013601 newDoc->dict = ctxt->dict;
13602 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013603 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013604 } else {
13605 ctxt->myDoc = oldctxt->myDoc;
13606 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013607 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013608 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013609 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13610 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013611 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013612 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013613 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013614 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013615 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013616 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013617 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013618 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013619 ctxt->myDoc->children = NULL;
13620 ctxt->myDoc->last = NULL;
13621 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013622 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013623 ctxt->instate = XML_PARSER_CONTENT;
13624 ctxt->depth = oldctxt->depth + 1;
13625
Daniel Veillard328f48c2002-11-15 15:24:34 +000013626 ctxt->validate = 0;
13627 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013628 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13629 /*
13630 * ID/IDREF registration will be done in xmlValidateElement below
13631 */
13632 ctxt->loadsubset |= XML_SKIP_IDS;
13633 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013634 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013635 ctxt->attsDefault = oldctxt->attsDefault;
13636 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013637
Daniel Veillard68e9e742002-11-16 15:35:11 +000013638 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013639 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013640 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013641 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013642 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013643 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013644 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013645 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013646 }
13647
13648 if (!ctxt->wellFormed) {
13649 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013650 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013651 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013652 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013653 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013654 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013655 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013656
William M. Brack7b9154b2003-09-27 19:23:50 +000013657 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013658 xmlNodePtr cur;
13659
13660 /*
13661 * Return the newly created nodeset after unlinking it from
13662 * they pseudo parent.
13663 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013664 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013665 *lst = cur;
13666 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013667#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013668 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13669 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13670 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013671 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13672 oldctxt->myDoc, cur);
13673 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013674#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013675 cur->parent = NULL;
13676 cur = cur->next;
13677 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013678 ctxt->myDoc->children->children = NULL;
13679 }
13680 if (ctxt->myDoc != NULL) {
13681 xmlFreeNode(ctxt->myDoc->children);
13682 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013683 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013684 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013685
13686 /*
13687 * Record in the parent context the number of entities replacement
13688 * done when parsing that reference.
13689 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013690 if (oldctxt != NULL)
13691 oldctxt->nbentities += ctxt->nbentities;
13692
Daniel Veillard0161e632008-08-28 15:36:32 +000013693 /*
13694 * Also record the last error if any
13695 */
13696 if (ctxt->lastError.code != XML_ERR_OK)
13697 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13698
Daniel Veillard328f48c2002-11-15 15:24:34 +000013699 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013700 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013701 ctxt->attsDefault = NULL;
13702 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013703 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013704 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013705 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013706 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013707
Daniel Veillard328f48c2002-11-15 15:24:34 +000013708 return(ret);
13709}
13710
Daniel Veillard29b17482004-08-16 00:39:03 +000013711/**
13712 * xmlParseInNodeContext:
13713 * @node: the context node
13714 * @data: the input string
13715 * @datalen: the input string length in bytes
13716 * @options: a combination of xmlParserOption
13717 * @lst: the return value for the set of parsed nodes
13718 *
13719 * Parse a well-balanced chunk of an XML document
13720 * within the context (DTD, namespaces, etc ...) of the given node.
13721 *
13722 * The allowed sequence for the data is a Well Balanced Chunk defined by
13723 * the content production in the XML grammar:
13724 *
13725 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13726 *
13727 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13728 * error code otherwise
13729 */
13730xmlParserErrors
13731xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13732 int options, xmlNodePtr *lst) {
13733#ifdef SAX2
13734 xmlParserCtxtPtr ctxt;
13735 xmlDocPtr doc = NULL;
13736 xmlNodePtr fake, cur;
13737 int nsnr = 0;
13738
13739 xmlParserErrors ret = XML_ERR_OK;
13740
13741 /*
13742 * check all input parameters, grab the document
13743 */
13744 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13745 return(XML_ERR_INTERNAL_ERROR);
13746 switch (node->type) {
13747 case XML_ELEMENT_NODE:
13748 case XML_ATTRIBUTE_NODE:
13749 case XML_TEXT_NODE:
13750 case XML_CDATA_SECTION_NODE:
13751 case XML_ENTITY_REF_NODE:
13752 case XML_PI_NODE:
13753 case XML_COMMENT_NODE:
13754 case XML_DOCUMENT_NODE:
13755 case XML_HTML_DOCUMENT_NODE:
13756 break;
13757 default:
13758 return(XML_ERR_INTERNAL_ERROR);
13759
13760 }
13761 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13762 (node->type != XML_DOCUMENT_NODE) &&
13763 (node->type != XML_HTML_DOCUMENT_NODE))
13764 node = node->parent;
13765 if (node == NULL)
13766 return(XML_ERR_INTERNAL_ERROR);
13767 if (node->type == XML_ELEMENT_NODE)
13768 doc = node->doc;
13769 else
13770 doc = (xmlDocPtr) node;
13771 if (doc == NULL)
13772 return(XML_ERR_INTERNAL_ERROR);
13773
13774 /*
13775 * allocate a context and set-up everything not related to the
13776 * node position in the tree
13777 */
13778 if (doc->type == XML_DOCUMENT_NODE)
13779 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13780#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013781 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013782 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013783 /*
13784 * When parsing in context, it makes no sense to add implied
13785 * elements like html/body/etc...
13786 */
13787 options |= HTML_PARSE_NOIMPLIED;
13788 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013789#endif
13790 else
13791 return(XML_ERR_INTERNAL_ERROR);
13792
13793 if (ctxt == NULL)
13794 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013795
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013796 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013797 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13798 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13799 * we must wait until the last moment to free the original one.
13800 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013801 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013802 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013803 xmlDictFree(ctxt->dict);
13804 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013805 } else
13806 options |= XML_PARSE_NODICT;
13807
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013808 if (doc->encoding != NULL) {
13809 xmlCharEncodingHandlerPtr hdlr;
13810
13811 if (ctxt->encoding != NULL)
13812 xmlFree((xmlChar *) ctxt->encoding);
13813 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13814
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013815 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013816 if (hdlr != NULL) {
13817 xmlSwitchToEncoding(ctxt, hdlr);
13818 } else {
13819 return(XML_ERR_UNSUPPORTED_ENCODING);
13820 }
13821 }
13822
Daniel Veillard37334572008-07-31 08:20:02 +000013823 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013824 xmlDetectSAX2(ctxt);
13825 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013826 /* parsing in context, i.e. as within existing content */
13827 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013828
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013829 fake = xmlNewComment(NULL);
13830 if (fake == NULL) {
13831 xmlFreeParserCtxt(ctxt);
13832 return(XML_ERR_NO_MEMORY);
13833 }
13834 xmlAddChild(node, fake);
13835
Daniel Veillard29b17482004-08-16 00:39:03 +000013836 if (node->type == XML_ELEMENT_NODE) {
13837 nodePush(ctxt, node);
13838 /*
13839 * initialize the SAX2 namespaces stack
13840 */
13841 cur = node;
13842 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13843 xmlNsPtr ns = cur->nsDef;
13844 const xmlChar *iprefix, *ihref;
13845
13846 while (ns != NULL) {
13847 if (ctxt->dict) {
13848 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13849 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13850 } else {
13851 iprefix = ns->prefix;
13852 ihref = ns->href;
13853 }
13854
13855 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13856 nsPush(ctxt, iprefix, ihref);
13857 nsnr++;
13858 }
13859 ns = ns->next;
13860 }
13861 cur = cur->parent;
13862 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013863 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013864
13865 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13866 /*
13867 * ID/IDREF registration will be done in xmlValidateElement below
13868 */
13869 ctxt->loadsubset |= XML_SKIP_IDS;
13870 }
13871
Daniel Veillard499cc922006-01-18 17:22:35 +000013872#ifdef LIBXML_HTML_ENABLED
13873 if (doc->type == XML_HTML_DOCUMENT_NODE)
13874 __htmlParseContent(ctxt);
13875 else
13876#endif
13877 xmlParseContent(ctxt);
13878
Daniel Veillard29b17482004-08-16 00:39:03 +000013879 nsPop(ctxt, nsnr);
13880 if ((RAW == '<') && (NXT(1) == '/')) {
13881 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13882 } else if (RAW != 0) {
13883 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13884 }
13885 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13886 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13887 ctxt->wellFormed = 0;
13888 }
13889
13890 if (!ctxt->wellFormed) {
13891 if (ctxt->errNo == 0)
13892 ret = XML_ERR_INTERNAL_ERROR;
13893 else
13894 ret = (xmlParserErrors)ctxt->errNo;
13895 } else {
13896 ret = XML_ERR_OK;
13897 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013898
Daniel Veillard29b17482004-08-16 00:39:03 +000013899 /*
13900 * Return the newly created nodeset after unlinking it from
13901 * the pseudo sibling.
13902 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013903
Daniel Veillard29b17482004-08-16 00:39:03 +000013904 cur = fake->next;
13905 fake->next = NULL;
13906 node->last = fake;
13907
13908 if (cur != NULL) {
13909 cur->prev = NULL;
13910 }
13911
13912 *lst = cur;
13913
13914 while (cur != NULL) {
13915 cur->parent = NULL;
13916 cur = cur->next;
13917 }
13918
13919 xmlUnlinkNode(fake);
13920 xmlFreeNode(fake);
13921
13922
13923 if (ret != XML_ERR_OK) {
13924 xmlFreeNodeList(*lst);
13925 *lst = NULL;
13926 }
William M. Brackc3f81342004-10-03 01:22:44 +000013927
William M. Brackb7b54de2004-10-06 16:38:01 +000013928 if (doc->dict != NULL)
13929 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013930 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013931
Daniel Veillard29b17482004-08-16 00:39:03 +000013932 return(ret);
13933#else /* !SAX2 */
13934 return(XML_ERR_INTERNAL_ERROR);
13935#endif
13936}
13937
Daniel Veillard81273902003-09-30 00:43:48 +000013938#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013939/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013940 * xmlParseBalancedChunkMemoryRecover:
13941 * @doc: the document the chunk pertains to
13942 * @sax: the SAX handler bloc (possibly NULL)
13943 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13944 * @depth: Used for loop detection, use 0
13945 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13946 * @lst: the return value for the set of parsed nodes
13947 * @recover: return nodes even if the data is broken (use 0)
13948 *
13949 *
13950 * Parse a well-balanced chunk of an XML document
13951 * called by the parser
13952 * The allowed sequence for the Well Balanced Chunk is the one defined by
13953 * the content production in the XML grammar:
13954 *
13955 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13956 *
13957 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13958 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013959 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013960 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013961 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13962 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013963 */
13964int
13965xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013966 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013967 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013968 xmlParserCtxtPtr ctxt;
13969 xmlDocPtr newDoc;
13970 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013971 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013972 int size;
13973 int ret = 0;
13974
Daniel Veillard0161e632008-08-28 15:36:32 +000013975 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013976 return(XML_ERR_ENTITY_LOOP);
13977 }
13978
13979
Daniel Veillardcda96922001-08-21 10:56:31 +000013980 if (lst != NULL)
13981 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013982 if (string == NULL)
13983 return(-1);
13984
13985 size = xmlStrlen(string);
13986
13987 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13988 if (ctxt == NULL) return(-1);
13989 ctxt->userData = ctxt;
13990 if (sax != NULL) {
13991 oldsax = ctxt->sax;
13992 ctxt->sax = sax;
13993 if (user_data != NULL)
13994 ctxt->userData = user_data;
13995 }
13996 newDoc = xmlNewDoc(BAD_CAST "1.0");
13997 if (newDoc == NULL) {
13998 xmlFreeParserCtxt(ctxt);
13999 return(-1);
14000 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000014001 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014002 if ((doc != NULL) && (doc->dict != NULL)) {
14003 xmlDictFree(ctxt->dict);
14004 ctxt->dict = doc->dict;
14005 xmlDictReference(ctxt->dict);
14006 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14007 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14008 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14009 ctxt->dictNames = 1;
14010 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000014011 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014012 }
Owen Taylor3473f882001-02-23 17:55:21 +000014013 if (doc != NULL) {
14014 newDoc->intSubset = doc->intSubset;
14015 newDoc->extSubset = doc->extSubset;
14016 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014017 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14018 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000014019 if (sax != NULL)
14020 ctxt->sax = oldsax;
14021 xmlFreeParserCtxt(ctxt);
14022 newDoc->intSubset = NULL;
14023 newDoc->extSubset = NULL;
14024 xmlFreeDoc(newDoc);
14025 return(-1);
14026 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014027 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14028 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000014029 if (doc == NULL) {
14030 ctxt->myDoc = newDoc;
14031 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000014032 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000014033 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000014034 /* Ensure that doc has XML spec namespace */
14035 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14036 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000014037 }
14038 ctxt->instate = XML_PARSER_CONTENT;
14039 ctxt->depth = depth;
14040
14041 /*
14042 * Doing validity checking on chunk doesn't make sense
14043 */
14044 ctxt->validate = 0;
14045 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014046 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014047
Daniel Veillardb39bc392002-10-26 19:29:51 +000014048 if ( doc != NULL ){
14049 content = doc->children;
14050 doc->children = NULL;
14051 xmlParseContent(ctxt);
14052 doc->children = content;
14053 }
14054 else {
14055 xmlParseContent(ctxt);
14056 }
Owen Taylor3473f882001-02-23 17:55:21 +000014057 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014058 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014059 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014060 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014061 }
14062 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014063 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014064 }
14065
14066 if (!ctxt->wellFormed) {
14067 if (ctxt->errNo == 0)
14068 ret = 1;
14069 else
14070 ret = ctxt->errNo;
14071 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000014072 ret = 0;
14073 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014074
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014075 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14076 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000014077
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014078 /*
14079 * Return the newly created nodeset after unlinking it from
14080 * they pseudo parent.
14081 */
14082 cur = newDoc->children->children;
14083 *lst = cur;
14084 while (cur != NULL) {
14085 xmlSetTreeDoc(cur, doc);
14086 cur->parent = NULL;
14087 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000014088 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014089 newDoc->children->children = NULL;
14090 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014091
14092 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000014093 ctxt->sax = oldsax;
14094 xmlFreeParserCtxt(ctxt);
14095 newDoc->intSubset = NULL;
14096 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000014097 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014098 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000014099
Owen Taylor3473f882001-02-23 17:55:21 +000014100 return(ret);
14101}
14102
14103/**
14104 * xmlSAXParseEntity:
14105 * @sax: the SAX handler block
14106 * @filename: the filename
14107 *
14108 * parse an XML external entity out of context and build a tree.
14109 * It use the given SAX function block to handle the parsing callback.
14110 * If sax is NULL, fallback to the default DOM tree building routines.
14111 *
14112 * [78] extParsedEnt ::= TextDecl? content
14113 *
14114 * This correspond to a "Well Balanced" chunk
14115 *
14116 * Returns the resulting document tree
14117 */
14118
14119xmlDocPtr
14120xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14121 xmlDocPtr ret;
14122 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014123
14124 ctxt = xmlCreateFileParserCtxt(filename);
14125 if (ctxt == NULL) {
14126 return(NULL);
14127 }
14128 if (sax != NULL) {
14129 if (ctxt->sax != NULL)
14130 xmlFree(ctxt->sax);
14131 ctxt->sax = sax;
14132 ctxt->userData = NULL;
14133 }
14134
Owen Taylor3473f882001-02-23 17:55:21 +000014135 xmlParseExtParsedEnt(ctxt);
14136
14137 if (ctxt->wellFormed)
14138 ret = ctxt->myDoc;
14139 else {
14140 ret = NULL;
14141 xmlFreeDoc(ctxt->myDoc);
14142 ctxt->myDoc = NULL;
14143 }
14144 if (sax != NULL)
14145 ctxt->sax = NULL;
14146 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000014147
Owen Taylor3473f882001-02-23 17:55:21 +000014148 return(ret);
14149}
14150
14151/**
14152 * xmlParseEntity:
14153 * @filename: the filename
14154 *
14155 * parse an XML external entity out of context and build a tree.
14156 *
14157 * [78] extParsedEnt ::= TextDecl? content
14158 *
14159 * This correspond to a "Well Balanced" chunk
14160 *
14161 * Returns the resulting document tree
14162 */
14163
14164xmlDocPtr
14165xmlParseEntity(const char *filename) {
14166 return(xmlSAXParseEntity(NULL, filename));
14167}
Daniel Veillard81273902003-09-30 00:43:48 +000014168#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014169
14170/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014171 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014172 * @URL: the entity URL
14173 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014174 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014175 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014176 *
14177 * Create a parser context for an external entity
14178 * Automatic support for ZLIB/Compress compressed document is provided
14179 * by default if found at compile-time.
14180 *
14181 * Returns the new parser context or NULL
14182 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014183static xmlParserCtxtPtr
14184xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14185 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014186 xmlParserCtxtPtr ctxt;
14187 xmlParserInputPtr inputStream;
14188 char *directory = NULL;
14189 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014190
Owen Taylor3473f882001-02-23 17:55:21 +000014191 ctxt = xmlNewParserCtxt();
14192 if (ctxt == NULL) {
14193 return(NULL);
14194 }
14195
Daniel Veillard48247b42009-07-10 16:12:46 +020014196 if (pctx != NULL) {
14197 ctxt->options = pctx->options;
14198 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014199 }
14200
Owen Taylor3473f882001-02-23 17:55:21 +000014201 uri = xmlBuildURI(URL, base);
14202
14203 if (uri == NULL) {
14204 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14205 if (inputStream == NULL) {
14206 xmlFreeParserCtxt(ctxt);
14207 return(NULL);
14208 }
14209
14210 inputPush(ctxt, inputStream);
14211
14212 if ((ctxt->directory == NULL) && (directory == NULL))
14213 directory = xmlParserGetDirectory((char *)URL);
14214 if ((ctxt->directory == NULL) && (directory != NULL))
14215 ctxt->directory = directory;
14216 } else {
14217 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14218 if (inputStream == NULL) {
14219 xmlFree(uri);
14220 xmlFreeParserCtxt(ctxt);
14221 return(NULL);
14222 }
14223
14224 inputPush(ctxt, inputStream);
14225
14226 if ((ctxt->directory == NULL) && (directory == NULL))
14227 directory = xmlParserGetDirectory((char *)uri);
14228 if ((ctxt->directory == NULL) && (directory != NULL))
14229 ctxt->directory = directory;
14230 xmlFree(uri);
14231 }
Owen Taylor3473f882001-02-23 17:55:21 +000014232 return(ctxt);
14233}
14234
Rob Richards9c0aa472009-03-26 18:10:19 +000014235/**
14236 * xmlCreateEntityParserCtxt:
14237 * @URL: the entity URL
14238 * @ID: the entity PUBLIC ID
14239 * @base: a possible base for the target URI
14240 *
14241 * Create a parser context for an external entity
14242 * Automatic support for ZLIB/Compress compressed document is provided
14243 * by default if found at compile-time.
14244 *
14245 * Returns the new parser context or NULL
14246 */
14247xmlParserCtxtPtr
14248xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14249 const xmlChar *base) {
14250 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14251
14252}
14253
Owen Taylor3473f882001-02-23 17:55:21 +000014254/************************************************************************
14255 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014256 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014257 * *
14258 ************************************************************************/
14259
14260/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014261 * xmlCreateURLParserCtxt:
14262 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014263 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014264 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014265 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014266 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014267 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014268 *
14269 * Returns the new parser context or NULL
14270 */
14271xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014272xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014273{
14274 xmlParserCtxtPtr ctxt;
14275 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014276 char *directory = NULL;
14277
Owen Taylor3473f882001-02-23 17:55:21 +000014278 ctxt = xmlNewParserCtxt();
14279 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014280 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014281 return(NULL);
14282 }
14283
Daniel Veillarddf292f72005-01-16 19:00:15 +000014284 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014285 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014286 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014287
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014288 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014289 if (inputStream == NULL) {
14290 xmlFreeParserCtxt(ctxt);
14291 return(NULL);
14292 }
14293
Owen Taylor3473f882001-02-23 17:55:21 +000014294 inputPush(ctxt, inputStream);
14295 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014296 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014297 if ((ctxt->directory == NULL) && (directory != NULL))
14298 ctxt->directory = directory;
14299
14300 return(ctxt);
14301}
14302
Daniel Veillard61b93382003-11-03 14:28:31 +000014303/**
14304 * xmlCreateFileParserCtxt:
14305 * @filename: the filename
14306 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014307 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014308 * Automatic support for ZLIB/Compress compressed document is provided
14309 * by default if found at compile-time.
14310 *
14311 * Returns the new parser context or NULL
14312 */
14313xmlParserCtxtPtr
14314xmlCreateFileParserCtxt(const char *filename)
14315{
14316 return(xmlCreateURLParserCtxt(filename, 0));
14317}
14318
Daniel Veillard81273902003-09-30 00:43:48 +000014319#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014320/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014321 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014322 * @sax: the SAX handler block
14323 * @filename: the filename
14324 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14325 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014326 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014327 *
14328 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14329 * compressed document is provided by default if found at compile-time.
14330 * It use the given SAX function block to handle the parsing callback.
14331 * If sax is NULL, fallback to the default DOM tree building routines.
14332 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014333 * User data (void *) is stored within the parser context in the
14334 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014335 *
Owen Taylor3473f882001-02-23 17:55:21 +000014336 * Returns the resulting document tree
14337 */
14338
14339xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014340xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14341 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014342 xmlDocPtr ret;
14343 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014344
Daniel Veillard635ef722001-10-29 11:48:19 +000014345 xmlInitParser();
14346
Owen Taylor3473f882001-02-23 17:55:21 +000014347 ctxt = xmlCreateFileParserCtxt(filename);
14348 if (ctxt == NULL) {
14349 return(NULL);
14350 }
14351 if (sax != NULL) {
14352 if (ctxt->sax != NULL)
14353 xmlFree(ctxt->sax);
14354 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014355 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014356 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014357 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014358 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014359 }
Owen Taylor3473f882001-02-23 17:55:21 +000014360
Daniel Veillard37d2d162008-03-14 10:54:00 +000014361 if (ctxt->directory == NULL)
14362 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014363
Daniel Veillarddad3f682002-11-17 16:47:27 +000014364 ctxt->recovery = recovery;
14365
Owen Taylor3473f882001-02-23 17:55:21 +000014366 xmlParseDocument(ctxt);
14367
William M. Brackc07329e2003-09-08 01:57:30 +000014368 if ((ctxt->wellFormed) || recovery) {
14369 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014370 if (ret != NULL) {
14371 if (ctxt->input->buf->compressed > 0)
14372 ret->compression = 9;
14373 else
14374 ret->compression = ctxt->input->buf->compressed;
14375 }
William M. Brackc07329e2003-09-08 01:57:30 +000014376 }
Owen Taylor3473f882001-02-23 17:55:21 +000014377 else {
14378 ret = NULL;
14379 xmlFreeDoc(ctxt->myDoc);
14380 ctxt->myDoc = NULL;
14381 }
14382 if (sax != NULL)
14383 ctxt->sax = NULL;
14384 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014385
Owen Taylor3473f882001-02-23 17:55:21 +000014386 return(ret);
14387}
14388
14389/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014390 * xmlSAXParseFile:
14391 * @sax: the SAX handler block
14392 * @filename: the filename
14393 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14394 * documents
14395 *
14396 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14397 * compressed document is provided by default if found at compile-time.
14398 * It use the given SAX function block to handle the parsing callback.
14399 * If sax is NULL, fallback to the default DOM tree building routines.
14400 *
14401 * Returns the resulting document tree
14402 */
14403
14404xmlDocPtr
14405xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14406 int recovery) {
14407 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14408}
14409
14410/**
Owen Taylor3473f882001-02-23 17:55:21 +000014411 * xmlRecoverDoc:
14412 * @cur: a pointer to an array of xmlChar
14413 *
14414 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014415 * In the case the document is not Well Formed, a attempt to build a
14416 * tree is tried anyway
14417 *
14418 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014419 */
14420
14421xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014422xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014423 return(xmlSAXParseDoc(NULL, cur, 1));
14424}
14425
14426/**
14427 * xmlParseFile:
14428 * @filename: the filename
14429 *
14430 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14431 * compressed document is provided by default if found at compile-time.
14432 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014433 * Returns the resulting document tree if the file was wellformed,
14434 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014435 */
14436
14437xmlDocPtr
14438xmlParseFile(const char *filename) {
14439 return(xmlSAXParseFile(NULL, filename, 0));
14440}
14441
14442/**
14443 * xmlRecoverFile:
14444 * @filename: the filename
14445 *
14446 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14447 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014448 * In the case the document is not Well Formed, it attempts to build
14449 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014450 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014451 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014452 */
14453
14454xmlDocPtr
14455xmlRecoverFile(const char *filename) {
14456 return(xmlSAXParseFile(NULL, filename, 1));
14457}
14458
14459
14460/**
14461 * xmlSetupParserForBuffer:
14462 * @ctxt: an XML parser context
14463 * @buffer: a xmlChar * buffer
14464 * @filename: a file name
14465 *
14466 * Setup the parser context to parse a new buffer; Clears any prior
14467 * contents from the parser context. The buffer parameter must not be
14468 * NULL, but the filename parameter can be
14469 */
14470void
14471xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14472 const char* filename)
14473{
14474 xmlParserInputPtr input;
14475
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014476 if ((ctxt == NULL) || (buffer == NULL))
14477 return;
14478
Owen Taylor3473f882001-02-23 17:55:21 +000014479 input = xmlNewInputStream(ctxt);
14480 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014481 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014482 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014483 return;
14484 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014485
Owen Taylor3473f882001-02-23 17:55:21 +000014486 xmlClearParserCtxt(ctxt);
14487 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014488 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014489 input->base = buffer;
14490 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014491 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014492 inputPush(ctxt, input);
14493}
14494
14495/**
14496 * xmlSAXUserParseFile:
14497 * @sax: a SAX handler
14498 * @user_data: The user data returned on SAX callbacks
14499 * @filename: a file name
14500 *
14501 * parse an XML file and call the given SAX handler routines.
14502 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014503 *
Owen Taylor3473f882001-02-23 17:55:21 +000014504 * Returns 0 in case of success or a error number otherwise
14505 */
14506int
14507xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14508 const char *filename) {
14509 int ret = 0;
14510 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014511
Owen Taylor3473f882001-02-23 17:55:21 +000014512 ctxt = xmlCreateFileParserCtxt(filename);
14513 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014514 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014515 xmlFree(ctxt->sax);
14516 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014517 xmlDetectSAX2(ctxt);
14518
Owen Taylor3473f882001-02-23 17:55:21 +000014519 if (user_data != NULL)
14520 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014521
Owen Taylor3473f882001-02-23 17:55:21 +000014522 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014523
Owen Taylor3473f882001-02-23 17:55:21 +000014524 if (ctxt->wellFormed)
14525 ret = 0;
14526 else {
14527 if (ctxt->errNo != 0)
14528 ret = ctxt->errNo;
14529 else
14530 ret = -1;
14531 }
14532 if (sax != NULL)
14533 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014534 if (ctxt->myDoc != NULL) {
14535 xmlFreeDoc(ctxt->myDoc);
14536 ctxt->myDoc = NULL;
14537 }
Owen Taylor3473f882001-02-23 17:55:21 +000014538 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014539
Owen Taylor3473f882001-02-23 17:55:21 +000014540 return ret;
14541}
Daniel Veillard81273902003-09-30 00:43:48 +000014542#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014543
14544/************************************************************************
14545 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014546 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014547 * *
14548 ************************************************************************/
14549
14550/**
14551 * xmlCreateMemoryParserCtxt:
14552 * @buffer: a pointer to a char array
14553 * @size: the size of the array
14554 *
14555 * Create a parser context for an XML in-memory document.
14556 *
14557 * Returns the new parser context or NULL
14558 */
14559xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014560xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014561 xmlParserCtxtPtr ctxt;
14562 xmlParserInputPtr input;
14563 xmlParserInputBufferPtr buf;
14564
14565 if (buffer == NULL)
14566 return(NULL);
14567 if (size <= 0)
14568 return(NULL);
14569
14570 ctxt = xmlNewParserCtxt();
14571 if (ctxt == NULL)
14572 return(NULL);
14573
Daniel Veillard53350552003-09-18 13:35:51 +000014574 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014575 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014576 if (buf == NULL) {
14577 xmlFreeParserCtxt(ctxt);
14578 return(NULL);
14579 }
Owen Taylor3473f882001-02-23 17:55:21 +000014580
14581 input = xmlNewInputStream(ctxt);
14582 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014583 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014584 xmlFreeParserCtxt(ctxt);
14585 return(NULL);
14586 }
14587
14588 input->filename = NULL;
14589 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014590 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014591
14592 inputPush(ctxt, input);
14593 return(ctxt);
14594}
14595
Daniel Veillard81273902003-09-30 00:43:48 +000014596#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014597/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014598 * xmlSAXParseMemoryWithData:
14599 * @sax: the SAX handler block
14600 * @buffer: an pointer to a char array
14601 * @size: the size of the array
14602 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14603 * documents
14604 * @data: the userdata
14605 *
14606 * parse an XML in-memory block and use the given SAX function block
14607 * to handle the parsing callback. If sax is NULL, fallback to the default
14608 * DOM tree building routines.
14609 *
14610 * User data (void *) is stored within the parser context in the
14611 * context's _private member, so it is available nearly everywhere in libxml
14612 *
14613 * Returns the resulting document tree
14614 */
14615
14616xmlDocPtr
14617xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14618 int size, int recovery, void *data) {
14619 xmlDocPtr ret;
14620 xmlParserCtxtPtr ctxt;
14621
Daniel Veillardab2a7632009-07-09 08:45:03 +020014622 xmlInitParser();
14623
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014624 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14625 if (ctxt == NULL) return(NULL);
14626 if (sax != NULL) {
14627 if (ctxt->sax != NULL)
14628 xmlFree(ctxt->sax);
14629 ctxt->sax = sax;
14630 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014631 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014632 if (data!=NULL) {
14633 ctxt->_private=data;
14634 }
14635
Daniel Veillardadba5f12003-04-04 16:09:01 +000014636 ctxt->recovery = recovery;
14637
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014638 xmlParseDocument(ctxt);
14639
14640 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14641 else {
14642 ret = NULL;
14643 xmlFreeDoc(ctxt->myDoc);
14644 ctxt->myDoc = NULL;
14645 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014646 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014647 ctxt->sax = NULL;
14648 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014649
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014650 return(ret);
14651}
14652
14653/**
Owen Taylor3473f882001-02-23 17:55:21 +000014654 * xmlSAXParseMemory:
14655 * @sax: the SAX handler block
14656 * @buffer: an pointer to a char array
14657 * @size: the size of the array
14658 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14659 * documents
14660 *
14661 * parse an XML in-memory block and use the given SAX function block
14662 * to handle the parsing callback. If sax is NULL, fallback to the default
14663 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014664 *
Owen Taylor3473f882001-02-23 17:55:21 +000014665 * Returns the resulting document tree
14666 */
14667xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014668xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14669 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014670 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014671}
14672
14673/**
14674 * xmlParseMemory:
14675 * @buffer: an pointer to a char array
14676 * @size: the size of the array
14677 *
14678 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014679 *
Owen Taylor3473f882001-02-23 17:55:21 +000014680 * Returns the resulting document tree
14681 */
14682
Daniel Veillard50822cb2001-07-26 20:05:51 +000014683xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014684 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14685}
14686
14687/**
14688 * xmlRecoverMemory:
14689 * @buffer: an pointer to a char array
14690 * @size: the size of the array
14691 *
14692 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014693 * In the case the document is not Well Formed, an attempt to
14694 * build a tree is tried anyway
14695 *
14696 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014697 */
14698
Daniel Veillard50822cb2001-07-26 20:05:51 +000014699xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014700 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14701}
14702
14703/**
14704 * xmlSAXUserParseMemory:
14705 * @sax: a SAX handler
14706 * @user_data: The user data returned on SAX callbacks
14707 * @buffer: an in-memory XML document input
14708 * @size: the length of the XML document in bytes
14709 *
14710 * A better SAX parsing routine.
14711 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014712 *
Owen Taylor3473f882001-02-23 17:55:21 +000014713 * Returns 0 in case of success or a error number otherwise
14714 */
14715int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014716 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014717 int ret = 0;
14718 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014719
14720 xmlInitParser();
14721
Owen Taylor3473f882001-02-23 17:55:21 +000014722 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14723 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014724 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14725 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014726 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014727 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014728
Daniel Veillard30211a02001-04-26 09:33:18 +000014729 if (user_data != NULL)
14730 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014731
Owen Taylor3473f882001-02-23 17:55:21 +000014732 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014733
Owen Taylor3473f882001-02-23 17:55:21 +000014734 if (ctxt->wellFormed)
14735 ret = 0;
14736 else {
14737 if (ctxt->errNo != 0)
14738 ret = ctxt->errNo;
14739 else
14740 ret = -1;
14741 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014742 if (sax != NULL)
14743 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014744 if (ctxt->myDoc != NULL) {
14745 xmlFreeDoc(ctxt->myDoc);
14746 ctxt->myDoc = NULL;
14747 }
Owen Taylor3473f882001-02-23 17:55:21 +000014748 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014749
Owen Taylor3473f882001-02-23 17:55:21 +000014750 return ret;
14751}
Daniel Veillard81273902003-09-30 00:43:48 +000014752#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014753
14754/**
14755 * xmlCreateDocParserCtxt:
14756 * @cur: a pointer to an array of xmlChar
14757 *
14758 * Creates a parser context for an XML in-memory document.
14759 *
14760 * Returns the new parser context or NULL
14761 */
14762xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014763xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014764 int len;
14765
14766 if (cur == NULL)
14767 return(NULL);
14768 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014769 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014770}
14771
Daniel Veillard81273902003-09-30 00:43:48 +000014772#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014773/**
14774 * xmlSAXParseDoc:
14775 * @sax: the SAX handler block
14776 * @cur: a pointer to an array of xmlChar
14777 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14778 * documents
14779 *
14780 * parse an XML in-memory document and build a tree.
14781 * It use the given SAX function block to handle the parsing callback.
14782 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014783 *
Owen Taylor3473f882001-02-23 17:55:21 +000014784 * Returns the resulting document tree
14785 */
14786
14787xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014788xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014789 xmlDocPtr ret;
14790 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014791 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014792
Daniel Veillard38936062004-11-04 17:45:11 +000014793 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014794
14795
14796 ctxt = xmlCreateDocParserCtxt(cur);
14797 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014798 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014799 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014800 ctxt->sax = sax;
14801 ctxt->userData = NULL;
14802 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014803 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014804
14805 xmlParseDocument(ctxt);
14806 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14807 else {
14808 ret = NULL;
14809 xmlFreeDoc(ctxt->myDoc);
14810 ctxt->myDoc = NULL;
14811 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014812 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014813 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014814 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014815
Owen Taylor3473f882001-02-23 17:55:21 +000014816 return(ret);
14817}
14818
14819/**
14820 * xmlParseDoc:
14821 * @cur: a pointer to an array of xmlChar
14822 *
14823 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014824 *
Owen Taylor3473f882001-02-23 17:55:21 +000014825 * Returns the resulting document tree
14826 */
14827
14828xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014829xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014830 return(xmlSAXParseDoc(NULL, cur, 0));
14831}
Daniel Veillard81273902003-09-30 00:43:48 +000014832#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014833
Daniel Veillard81273902003-09-30 00:43:48 +000014834#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014835/************************************************************************
14836 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014837 * Specific function to keep track of entities references *
14838 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014839 * *
14840 ************************************************************************/
14841
14842static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14843
14844/**
14845 * xmlAddEntityReference:
14846 * @ent : A valid entity
14847 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014848 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014849 *
14850 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14851 */
14852static void
14853xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14854 xmlNodePtr lastNode)
14855{
14856 if (xmlEntityRefFunc != NULL) {
14857 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14858 }
14859}
14860
14861
14862/**
14863 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014864 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014865 *
14866 * Set the function to call call back when a xml reference has been made
14867 */
14868void
14869xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14870{
14871 xmlEntityRefFunc = func;
14872}
Daniel Veillard81273902003-09-30 00:43:48 +000014873#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014874
14875/************************************************************************
14876 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014877 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014878 * *
14879 ************************************************************************/
14880
14881#ifdef LIBXML_XPATH_ENABLED
14882#include <libxml/xpath.h>
14883#endif
14884
Daniel Veillardffa3c742005-07-21 13:24:09 +000014885extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014886static int xmlParserInitialized = 0;
14887
14888/**
14889 * xmlInitParser:
14890 *
14891 * Initialization function for the XML parser.
14892 * This is not reentrant. Call once before processing in case of
14893 * use in multithreaded programs.
14894 */
14895
14896void
14897xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014898 if (xmlParserInitialized != 0)
14899 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014900
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014901#ifdef LIBXML_THREAD_ENABLED
14902 __xmlGlobalInitMutexLock();
14903 if (xmlParserInitialized == 0) {
14904#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014905 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014906 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014907 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14908 (xmlGenericError == NULL))
14909 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014910 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014911 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014912 xmlInitCharEncodingHandlers();
14913 xmlDefaultSAXHandlerInit();
14914 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014915#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014916 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014917#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014918#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014919 htmlInitAutoClose();
14920 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014921#endif
14922#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014923 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014924#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014925 xmlParserInitialized = 1;
14926#ifdef LIBXML_THREAD_ENABLED
14927 }
14928 __xmlGlobalInitMutexUnlock();
14929#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014930}
14931
14932/**
14933 * xmlCleanupParser:
14934 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014935 * This function name is somewhat misleading. It does not clean up
14936 * parser state, it cleans up memory allocated by the library itself.
14937 * It is a cleanup function for the XML library. It tries to reclaim all
14938 * related global memory allocated for the library processing.
14939 * It doesn't deallocate any document related memory. One should
14940 * call xmlCleanupParser() only when the process has finished using
14941 * the library and all XML/HTML documents built with it.
14942 * See also xmlInitParser() which has the opposite function of preparing
14943 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014944 *
14945 * WARNING: if your application is multithreaded or has plugin support
14946 * calling this may crash the application if another thread or
14947 * a plugin is still using libxml2. It's sometimes very hard to
14948 * guess if libxml2 is in use in the application, some libraries
14949 * or plugins may use it without notice. In case of doubt abstain
14950 * from calling this function or do it just before calling exit()
14951 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014952 */
14953
14954void
14955xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014956 if (!xmlParserInitialized)
14957 return;
14958
Owen Taylor3473f882001-02-23 17:55:21 +000014959 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014960#ifdef LIBXML_CATALOG_ENABLED
14961 xmlCatalogCleanup();
14962#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014963 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014964 xmlCleanupInputCallbacks();
14965#ifdef LIBXML_OUTPUT_ENABLED
14966 xmlCleanupOutputCallbacks();
14967#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014968#ifdef LIBXML_SCHEMAS_ENABLED
14969 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014970 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014971#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014972 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014973 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014974 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014975 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014976 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014977}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014978
14979/************************************************************************
14980 * *
14981 * New set (2.6.0) of simpler and more flexible APIs *
14982 * *
14983 ************************************************************************/
14984
14985/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014986 * DICT_FREE:
14987 * @str: a string
14988 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014989 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014990 * current scope
14991 */
14992#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014993 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014994 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14995 xmlFree((char *)(str));
14996
14997/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014998 * xmlCtxtReset:
14999 * @ctxt: an XML parser context
15000 *
15001 * Reset a parser context
15002 */
15003void
15004xmlCtxtReset(xmlParserCtxtPtr ctxt)
15005{
15006 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015007 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015008
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015009 if (ctxt == NULL)
15010 return;
15011
15012 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015013
15014 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15015 xmlFreeInputStream(input);
15016 }
15017 ctxt->inputNr = 0;
15018 ctxt->input = NULL;
15019
15020 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000015021 if (ctxt->spaceTab != NULL) {
15022 ctxt->spaceTab[0] = -1;
15023 ctxt->space = &ctxt->spaceTab[0];
15024 } else {
15025 ctxt->space = NULL;
15026 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015027
15028
15029 ctxt->nodeNr = 0;
15030 ctxt->node = NULL;
15031
15032 ctxt->nameNr = 0;
15033 ctxt->name = NULL;
15034
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015035 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015036 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015037 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015038 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015039 DICT_FREE(ctxt->directory);
15040 ctxt->directory = NULL;
15041 DICT_FREE(ctxt->extSubURI);
15042 ctxt->extSubURI = NULL;
15043 DICT_FREE(ctxt->extSubSystem);
15044 ctxt->extSubSystem = NULL;
15045 if (ctxt->myDoc != NULL)
15046 xmlFreeDoc(ctxt->myDoc);
15047 ctxt->myDoc = NULL;
15048
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015049 ctxt->standalone = -1;
15050 ctxt->hasExternalSubset = 0;
15051 ctxt->hasPErefs = 0;
15052 ctxt->html = 0;
15053 ctxt->external = 0;
15054 ctxt->instate = XML_PARSER_START;
15055 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015056
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015057 ctxt->wellFormed = 1;
15058 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000015059 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015060 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015061#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015062 ctxt->vctxt.userData = ctxt;
15063 ctxt->vctxt.error = xmlParserValidityError;
15064 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015065#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015066 ctxt->record_info = 0;
15067 ctxt->nbChars = 0;
15068 ctxt->checkIndex = 0;
15069 ctxt->inSubset = 0;
15070 ctxt->errNo = XML_ERR_OK;
15071 ctxt->depth = 0;
15072 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15073 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000015074 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000015075 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080015076 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015077 xmlInitNodeInfoSeq(&ctxt->node_seq);
15078
15079 if (ctxt->attsDefault != NULL) {
15080 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15081 ctxt->attsDefault = NULL;
15082 }
15083 if (ctxt->attsSpecial != NULL) {
15084 xmlHashFree(ctxt->attsSpecial, NULL);
15085 ctxt->attsSpecial = NULL;
15086 }
15087
Daniel Veillard4432df22003-09-28 18:58:27 +000015088#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015089 if (ctxt->catalogs != NULL)
15090 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000015091#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000015092 if (ctxt->lastError.code != XML_ERR_OK)
15093 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015094}
15095
15096/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015097 * xmlCtxtResetPush:
15098 * @ctxt: an XML parser context
15099 * @chunk: a pointer to an array of chars
15100 * @size: number of chars in the array
15101 * @filename: an optional file name or URI
15102 * @encoding: the document encoding, or NULL
15103 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015104 * Reset a push parser context
15105 *
15106 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015107 */
15108int
15109xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15110 int size, const char *filename, const char *encoding)
15111{
15112 xmlParserInputPtr inputStream;
15113 xmlParserInputBufferPtr buf;
15114 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15115
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015116 if (ctxt == NULL)
15117 return(1);
15118
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015119 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15120 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15121
15122 buf = xmlAllocParserInputBuffer(enc);
15123 if (buf == NULL)
15124 return(1);
15125
15126 if (ctxt == NULL) {
15127 xmlFreeParserInputBuffer(buf);
15128 return(1);
15129 }
15130
15131 xmlCtxtReset(ctxt);
15132
15133 if (ctxt->pushTab == NULL) {
15134 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15135 sizeof(xmlChar *));
15136 if (ctxt->pushTab == NULL) {
15137 xmlErrMemory(ctxt, NULL);
15138 xmlFreeParserInputBuffer(buf);
15139 return(1);
15140 }
15141 }
15142
15143 if (filename == NULL) {
15144 ctxt->directory = NULL;
15145 } else {
15146 ctxt->directory = xmlParserGetDirectory(filename);
15147 }
15148
15149 inputStream = xmlNewInputStream(ctxt);
15150 if (inputStream == NULL) {
15151 xmlFreeParserInputBuffer(buf);
15152 return(1);
15153 }
15154
15155 if (filename == NULL)
15156 inputStream->filename = NULL;
15157 else
15158 inputStream->filename = (char *)
15159 xmlCanonicPath((const xmlChar *) filename);
15160 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080015161 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015162
15163 inputPush(ctxt, inputStream);
15164
15165 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15166 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015167 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15168 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015169
15170 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15171
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015172 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015173#ifdef DEBUG_PUSH
15174 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15175#endif
15176 }
15177
15178 if (encoding != NULL) {
15179 xmlCharEncodingHandlerPtr hdlr;
15180
Daniel Veillard37334572008-07-31 08:20:02 +000015181 if (ctxt->encoding != NULL)
15182 xmlFree((xmlChar *) ctxt->encoding);
15183 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15184
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015185 hdlr = xmlFindCharEncodingHandler(encoding);
15186 if (hdlr != NULL) {
15187 xmlSwitchToEncoding(ctxt, hdlr);
15188 } else {
15189 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15190 "Unsupported encoding %s\n", BAD_CAST encoding);
15191 }
15192 } else if (enc != XML_CHAR_ENCODING_NONE) {
15193 xmlSwitchEncoding(ctxt, enc);
15194 }
15195
15196 return(0);
15197}
15198
Daniel Veillard37334572008-07-31 08:20:02 +000015199
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015200/**
Daniel Veillard37334572008-07-31 08:20:02 +000015201 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015202 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015203 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015204 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015205 *
15206 * Applies the options to the parser context
15207 *
15208 * Returns 0 in case of success, the set of unknown or unimplemented options
15209 * in case of error.
15210 */
Daniel Veillard37334572008-07-31 08:20:02 +000015211static int
15212xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015213{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015214 if (ctxt == NULL)
15215 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015216 if (encoding != NULL) {
15217 if (ctxt->encoding != NULL)
15218 xmlFree((xmlChar *) ctxt->encoding);
15219 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15220 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015221 if (options & XML_PARSE_RECOVER) {
15222 ctxt->recovery = 1;
15223 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015224 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015225 } else
15226 ctxt->recovery = 0;
15227 if (options & XML_PARSE_DTDLOAD) {
15228 ctxt->loadsubset = XML_DETECT_IDS;
15229 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015230 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015231 } else
15232 ctxt->loadsubset = 0;
15233 if (options & XML_PARSE_DTDATTR) {
15234 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15235 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015236 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015237 }
15238 if (options & XML_PARSE_NOENT) {
15239 ctxt->replaceEntities = 1;
15240 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15241 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015242 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015243 } else
15244 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015245 if (options & XML_PARSE_PEDANTIC) {
15246 ctxt->pedantic = 1;
15247 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015248 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015249 } else
15250 ctxt->pedantic = 0;
15251 if (options & XML_PARSE_NOBLANKS) {
15252 ctxt->keepBlanks = 0;
15253 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15254 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015255 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015256 } else
15257 ctxt->keepBlanks = 1;
15258 if (options & XML_PARSE_DTDVALID) {
15259 ctxt->validate = 1;
15260 if (options & XML_PARSE_NOWARNING)
15261 ctxt->vctxt.warning = NULL;
15262 if (options & XML_PARSE_NOERROR)
15263 ctxt->vctxt.error = NULL;
15264 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015265 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266 } else
15267 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015268 if (options & XML_PARSE_NOWARNING) {
15269 ctxt->sax->warning = NULL;
15270 options -= XML_PARSE_NOWARNING;
15271 }
15272 if (options & XML_PARSE_NOERROR) {
15273 ctxt->sax->error = NULL;
15274 ctxt->sax->fatalError = NULL;
15275 options -= XML_PARSE_NOERROR;
15276 }
Daniel Veillard81273902003-09-30 00:43:48 +000015277#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015278 if (options & XML_PARSE_SAX1) {
15279 ctxt->sax->startElement = xmlSAX2StartElement;
15280 ctxt->sax->endElement = xmlSAX2EndElement;
15281 ctxt->sax->startElementNs = NULL;
15282 ctxt->sax->endElementNs = NULL;
15283 ctxt->sax->initialized = 1;
15284 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015285 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015286 }
Daniel Veillard81273902003-09-30 00:43:48 +000015287#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015288 if (options & XML_PARSE_NODICT) {
15289 ctxt->dictNames = 0;
15290 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015291 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015292 } else {
15293 ctxt->dictNames = 1;
15294 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015295 if (options & XML_PARSE_NOCDATA) {
15296 ctxt->sax->cdataBlock = NULL;
15297 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015298 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015299 }
15300 if (options & XML_PARSE_NSCLEAN) {
15301 ctxt->options |= XML_PARSE_NSCLEAN;
15302 options -= XML_PARSE_NSCLEAN;
15303 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015304 if (options & XML_PARSE_NONET) {
15305 ctxt->options |= XML_PARSE_NONET;
15306 options -= XML_PARSE_NONET;
15307 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015308 if (options & XML_PARSE_COMPACT) {
15309 ctxt->options |= XML_PARSE_COMPACT;
15310 options -= XML_PARSE_COMPACT;
15311 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015312 if (options & XML_PARSE_OLD10) {
15313 ctxt->options |= XML_PARSE_OLD10;
15314 options -= XML_PARSE_OLD10;
15315 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015316 if (options & XML_PARSE_NOBASEFIX) {
15317 ctxt->options |= XML_PARSE_NOBASEFIX;
15318 options -= XML_PARSE_NOBASEFIX;
15319 }
15320 if (options & XML_PARSE_HUGE) {
15321 ctxt->options |= XML_PARSE_HUGE;
15322 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015323 if (ctxt->dict != NULL)
15324 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015325 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015326 if (options & XML_PARSE_OLDSAX) {
15327 ctxt->options |= XML_PARSE_OLDSAX;
15328 options -= XML_PARSE_OLDSAX;
15329 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015330 if (options & XML_PARSE_IGNORE_ENC) {
15331 ctxt->options |= XML_PARSE_IGNORE_ENC;
15332 options -= XML_PARSE_IGNORE_ENC;
15333 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015334 if (options & XML_PARSE_BIG_LINES) {
15335 ctxt->options |= XML_PARSE_BIG_LINES;
15336 options -= XML_PARSE_BIG_LINES;
15337 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015338 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015339 return (options);
15340}
15341
15342/**
Daniel Veillard37334572008-07-31 08:20:02 +000015343 * xmlCtxtUseOptions:
15344 * @ctxt: an XML parser context
15345 * @options: a combination of xmlParserOption
15346 *
15347 * Applies the options to the parser context
15348 *
15349 * Returns 0 in case of success, the set of unknown or unimplemented options
15350 * in case of error.
15351 */
15352int
15353xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15354{
15355 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15356}
15357
15358/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015359 * xmlDoRead:
15360 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015361 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015362 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015363 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015364 * @reuse: keep the context for reuse
15365 *
15366 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015367 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015368 * Returns the resulting document tree or NULL
15369 */
15370static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015371xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15372 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015373{
15374 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015375
15376 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015377 if (encoding != NULL) {
15378 xmlCharEncodingHandlerPtr hdlr;
15379
15380 hdlr = xmlFindCharEncodingHandler(encoding);
15381 if (hdlr != NULL)
15382 xmlSwitchToEncoding(ctxt, hdlr);
15383 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015384 if ((URL != NULL) && (ctxt->input != NULL) &&
15385 (ctxt->input->filename == NULL))
15386 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015387 xmlParseDocument(ctxt);
15388 if ((ctxt->wellFormed) || ctxt->recovery)
15389 ret = ctxt->myDoc;
15390 else {
15391 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015392 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015393 xmlFreeDoc(ctxt->myDoc);
15394 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015395 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015396 ctxt->myDoc = NULL;
15397 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015398 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015399 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015400
15401 return (ret);
15402}
15403
15404/**
15405 * xmlReadDoc:
15406 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015407 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015409 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015410 *
15411 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015412 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015413 * Returns the resulting document tree
15414 */
15415xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015416xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015417{
15418 xmlParserCtxtPtr ctxt;
15419
15420 if (cur == NULL)
15421 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015422 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015423
15424 ctxt = xmlCreateDocParserCtxt(cur);
15425 if (ctxt == NULL)
15426 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015427 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428}
15429
15430/**
15431 * xmlReadFile:
15432 * @filename: a file or URL
15433 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015434 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015435 *
15436 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015437 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015438 * Returns the resulting document tree
15439 */
15440xmlDocPtr
15441xmlReadFile(const char *filename, const char *encoding, int options)
15442{
15443 xmlParserCtxtPtr ctxt;
15444
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015445 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015446 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015447 if (ctxt == NULL)
15448 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015449 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015450}
15451
15452/**
15453 * xmlReadMemory:
15454 * @buffer: a pointer to a char array
15455 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015456 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015457 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015458 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015459 *
15460 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015461 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015462 * Returns the resulting document tree
15463 */
15464xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015465xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015466{
15467 xmlParserCtxtPtr ctxt;
15468
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015469 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015470 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15471 if (ctxt == NULL)
15472 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015473 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015474}
15475
15476/**
15477 * xmlReadFd:
15478 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015479 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015480 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015481 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015482 *
15483 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015484 * NOTE that the file descriptor will not be closed when the
15485 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015486 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015487 * Returns the resulting document tree
15488 */
15489xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015490xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015491{
15492 xmlParserCtxtPtr ctxt;
15493 xmlParserInputBufferPtr input;
15494 xmlParserInputPtr stream;
15495
15496 if (fd < 0)
15497 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015498 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015499
15500 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15501 if (input == NULL)
15502 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015503 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015504 ctxt = xmlNewParserCtxt();
15505 if (ctxt == NULL) {
15506 xmlFreeParserInputBuffer(input);
15507 return (NULL);
15508 }
15509 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15510 if (stream == NULL) {
15511 xmlFreeParserInputBuffer(input);
15512 xmlFreeParserCtxt(ctxt);
15513 return (NULL);
15514 }
15515 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015516 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015517}
15518
15519/**
15520 * xmlReadIO:
15521 * @ioread: an I/O read function
15522 * @ioclose: an I/O close function
15523 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015524 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015525 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015526 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015527 *
15528 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015529 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015530 * Returns the resulting document tree
15531 */
15532xmlDocPtr
15533xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015534 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015535{
15536 xmlParserCtxtPtr ctxt;
15537 xmlParserInputBufferPtr input;
15538 xmlParserInputPtr stream;
15539
15540 if (ioread == NULL)
15541 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015542 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015543
15544 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15545 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015546 if (input == NULL) {
15547 if (ioclose != NULL)
15548 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015549 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015550 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015551 ctxt = xmlNewParserCtxt();
15552 if (ctxt == NULL) {
15553 xmlFreeParserInputBuffer(input);
15554 return (NULL);
15555 }
15556 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15557 if (stream == NULL) {
15558 xmlFreeParserInputBuffer(input);
15559 xmlFreeParserCtxt(ctxt);
15560 return (NULL);
15561 }
15562 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015563 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015564}
15565
15566/**
15567 * xmlCtxtReadDoc:
15568 * @ctxt: an XML parser context
15569 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015570 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015571 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015572 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015573 *
15574 * parse an XML in-memory document and build a tree.
15575 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015576 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015577 * Returns the resulting document tree
15578 */
15579xmlDocPtr
15580xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015581 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015582{
15583 xmlParserInputPtr stream;
15584
15585 if (cur == NULL)
15586 return (NULL);
15587 if (ctxt == NULL)
15588 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015589 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015590
15591 xmlCtxtReset(ctxt);
15592
15593 stream = xmlNewStringInputStream(ctxt, cur);
15594 if (stream == NULL) {
15595 return (NULL);
15596 }
15597 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015598 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015599}
15600
15601/**
15602 * xmlCtxtReadFile:
15603 * @ctxt: an XML parser context
15604 * @filename: a file or URL
15605 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015606 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015607 *
15608 * parse an XML file from the filesystem or the network.
15609 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015610 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015611 * Returns the resulting document tree
15612 */
15613xmlDocPtr
15614xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15615 const char *encoding, int options)
15616{
15617 xmlParserInputPtr stream;
15618
15619 if (filename == NULL)
15620 return (NULL);
15621 if (ctxt == NULL)
15622 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015623 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015624
15625 xmlCtxtReset(ctxt);
15626
Daniel Veillard29614c72004-11-26 10:47:26 +000015627 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015628 if (stream == NULL) {
15629 return (NULL);
15630 }
15631 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015632 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015633}
15634
15635/**
15636 * xmlCtxtReadMemory:
15637 * @ctxt: an XML parser context
15638 * @buffer: a pointer to a char array
15639 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015640 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015641 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015642 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015643 *
15644 * parse an XML in-memory document and build a tree.
15645 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015646 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015647 * Returns the resulting document tree
15648 */
15649xmlDocPtr
15650xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015651 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015652{
15653 xmlParserInputBufferPtr input;
15654 xmlParserInputPtr stream;
15655
15656 if (ctxt == NULL)
15657 return (NULL);
15658 if (buffer == NULL)
15659 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015660 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015661
15662 xmlCtxtReset(ctxt);
15663
15664 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15665 if (input == NULL) {
15666 return(NULL);
15667 }
15668
15669 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15670 if (stream == NULL) {
15671 xmlFreeParserInputBuffer(input);
15672 return(NULL);
15673 }
15674
15675 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015676 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015677}
15678
15679/**
15680 * xmlCtxtReadFd:
15681 * @ctxt: an XML parser context
15682 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015683 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015684 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015685 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015686 *
15687 * parse an XML from a file descriptor and build a tree.
15688 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015689 * NOTE that the file descriptor will not be closed when the
15690 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015691 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015692 * Returns the resulting document tree
15693 */
15694xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015695xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15696 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015697{
15698 xmlParserInputBufferPtr input;
15699 xmlParserInputPtr stream;
15700
15701 if (fd < 0)
15702 return (NULL);
15703 if (ctxt == NULL)
15704 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015705 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015706
15707 xmlCtxtReset(ctxt);
15708
15709
15710 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15711 if (input == NULL)
15712 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015713 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015714 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15715 if (stream == NULL) {
15716 xmlFreeParserInputBuffer(input);
15717 return (NULL);
15718 }
15719 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015720 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015721}
15722
15723/**
15724 * xmlCtxtReadIO:
15725 * @ctxt: an XML parser context
15726 * @ioread: an I/O read function
15727 * @ioclose: an I/O close function
15728 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015729 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015730 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015731 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015732 *
15733 * parse an XML document from I/O functions and source and build a tree.
15734 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015735 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015736 * Returns the resulting document tree
15737 */
15738xmlDocPtr
15739xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15740 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015741 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015742 const char *encoding, int options)
15743{
15744 xmlParserInputBufferPtr input;
15745 xmlParserInputPtr stream;
15746
15747 if (ioread == NULL)
15748 return (NULL);
15749 if (ctxt == NULL)
15750 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015751 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015752
15753 xmlCtxtReset(ctxt);
15754
15755 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15756 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015757 if (input == NULL) {
15758 if (ioclose != NULL)
15759 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015760 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015761 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015762 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15763 if (stream == NULL) {
15764 xmlFreeParserInputBuffer(input);
15765 return (NULL);
15766 }
15767 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015768 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015769}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015770
15771#define bottom_parser
15772#include "elfgcchack.h"