blob: df2efa55f56688a7a33fa63033076f3e0c5f7c26 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020046#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000048#include <libxml/threads.h>
49#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000050#include <libxml/tree.h>
51#include <libxml/parser.h>
52#include <libxml/parserInternals.h>
53#include <libxml/valid.h>
54#include <libxml/entities.h>
55#include <libxml/xmlerror.h>
56#include <libxml/encoding.h>
57#include <libxml/xmlIO.h>
58#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000059#ifdef LIBXML_CATALOG_ENABLED
60#include <libxml/catalog.h>
61#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000062#ifdef LIBXML_SCHEMAS_ENABLED
63#include <libxml/xmlschemastypes.h>
64#include <libxml/relaxng.h>
65#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066#ifdef HAVE_CTYPE_H
67#include <ctype.h>
68#endif
69#ifdef HAVE_STDLIB_H
70#include <stdlib.h>
71#endif
72#ifdef HAVE_SYS_STAT_H
73#include <sys/stat.h>
74#endif
75#ifdef HAVE_FCNTL_H
76#include <fcntl.h>
77#endif
78#ifdef HAVE_UNISTD_H
79#include <unistd.h>
80#endif
81#ifdef HAVE_ZLIB_H
82#include <zlib.h>
83#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020084#ifdef HAVE_LZMA_H
85#include <lzma.h>
86#endif
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard768eb3b2012-07-16 14:19:49 +080088#include "buf.h"
89#include "enc.h"
90
Daniel Veillard0161e632008-08-28 15:36:32 +000091static void
92xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93
Rob Richards9c0aa472009-03-26 18:10:19 +000094static xmlParserCtxtPtr
95xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
96 const xmlChar *base, xmlParserCtxtPtr pctx);
97
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080098static void xmlHaltParser(xmlParserCtxtPtr ctxt);
99
Daniel Veillard0161e632008-08-28 15:36:32 +0000100/************************************************************************
101 * *
102 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 * *
104 ************************************************************************/
105
106#define XML_PARSER_BIG_ENTITY 1000
107#define XML_PARSER_LOT_ENTITY 5000
108
109/*
110 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
111 * replacement over the size in byte of the input indicates that you have
112 * and eponential behaviour. A value of 10 correspond to at least 3 entity
113 * replacement per byte of input.
114 */
115#define XML_PARSER_NON_LINEAR 10
116
117/*
118 * xmlParserEntityCheck
119 *
120 * Function to check non-linear entity expansion behaviour
121 * This is here to detect and stop exponential linear entity expansion
122 * This is not a limitation of the parser but a safety
123 * boundary feature. It can be disabled with the XML_PARSE_HUGE
124 * parser option.
125 */
126static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800128 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000129{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800130 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000131
132 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
133 return (0);
134 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
135 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800136
137 /*
138 * This may look absurd but is needed to detect
139 * entities problems
140 */
141 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800142 (ent->content != NULL) && (ent->checked == 0) &&
143 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800144 unsigned long oldnbent = ctxt->nbentities;
145 xmlChar *rep;
146
147 ent->checked = 1;
148
Peter Simons8f30bdf2016-04-15 11:56:55 +0200149 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800150 rep = xmlStringDecodeEntities(ctxt, ent->content,
151 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200152 --ctxt->depth;
Daniel Veillardbdd66182016-05-23 12:27:58 +0800153 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
154 ent->content[0] = 0;
155 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800156
157 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
158 if (rep != NULL) {
159 if (xmlStrchr(rep, '<'))
160 ent->checked |= 1;
161 xmlFree(rep);
162 rep = NULL;
163 }
164 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800165 if (replacement != 0) {
166 if (replacement < XML_MAX_TEXT_LENGTH)
167 return(0);
168
169 /*
170 * If the volume of entity copy reaches 10 times the
171 * amount of parsed data and over the large text threshold
172 * then that's very likely to be an abuse.
173 */
174 if (ctxt->input != NULL) {
175 consumed = ctxt->input->consumed +
176 (ctxt->input->cur - ctxt->input->base);
177 }
178 consumed += ctxt->sizeentities;
179
180 if (replacement < XML_PARSER_NON_LINEAR * consumed)
181 return(0);
182 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000183 /*
184 * Do the check based on the replacement size of the entity
185 */
186 if (size < XML_PARSER_BIG_ENTITY)
187 return(0);
188
189 /*
190 * A limit on the amount of text data reasonably used
191 */
192 if (ctxt->input != NULL) {
193 consumed = ctxt->input->consumed +
194 (ctxt->input->cur - ctxt->input->base);
195 }
196 consumed += ctxt->sizeentities;
197
198 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
199 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
200 return (0);
201 } else if (ent != NULL) {
202 /*
203 * use the number of parsed entities in the replacement
204 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800205 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000206
207 /*
208 * The amount of data parsed counting entities size only once
209 */
210 if (ctxt->input != NULL) {
211 consumed = ctxt->input->consumed +
212 (ctxt->input->cur - ctxt->input->base);
213 }
214 consumed += ctxt->sizeentities;
215
216 /*
217 * Check the density of entities for the amount of data
218 * knowing an entity reference will take at least 3 bytes
219 */
220 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
221 return (0);
222 } else {
223 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800224 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000225 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800226 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
227 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
228 (ctxt->nbentities <= 10000))
229 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000231 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
232 return (1);
233}
234
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000235/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000236 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000237 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000238 * arbitrary depth limit for the XML documents that we allow to
239 * process. This is not a limitation of the parser but a safety
240 * boundary feature. It can be disabled with the XML_PARSE_HUGE
241 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000242 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000243unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000244
Daniel Veillard0fb18932003-09-07 09:14:37 +0000245
Daniel Veillard0161e632008-08-28 15:36:32 +0000246
247#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000248#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000249#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000250#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
251
Daniel Veillard1f972e92012-08-15 10:16:37 +0800252/**
253 * XML_PARSER_CHUNK_SIZE
254 *
255 * When calling GROW that's the minimal amount of data
256 * the parser expected to have received. It is not a hard
257 * limit but an optimization when reading strings like Names
258 * It is not strictly needed as long as inputs available characters
259 * are followed by 0, which should be provided by the I/O level
260 */
261#define XML_PARSER_CHUNK_SIZE 100
262
Owen Taylor3473f882001-02-23 17:55:21 +0000263/*
Owen Taylor3473f882001-02-23 17:55:21 +0000264 * List of XML prefixed PI allowed by W3C specs
265 */
266
Daniel Veillardb44025c2001-10-11 22:55:55 +0000267static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000268 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800269 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000270 NULL
271};
272
Daniel Veillarda07050d2003-10-19 14:46:32 +0000273
Owen Taylor3473f882001-02-23 17:55:21 +0000274/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200275static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
276 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000277
Daniel Veillard7d515752003-09-26 19:12:37 +0000278static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000279xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
280 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000281 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000282 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000283
Daniel Veillard37334572008-07-31 08:20:02 +0000284static int
285xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
286 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000287#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000288static void
289xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
290 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000291#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000292
Daniel Veillard7d515752003-09-26 19:12:37 +0000293static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000294xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
295 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000296
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000297static int
298xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
299
Daniel Veillarde57ec792003-09-10 10:50:59 +0000300/************************************************************************
301 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800302 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 * *
304 ************************************************************************/
305
306/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 * xmlErrAttributeDup:
308 * @ctxt: an XML parser context
309 * @prefix: the attribute prefix
310 * @localname: the attribute localname
311 *
312 * Handle a redefinition of attribute error
313 */
314static void
315xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
316 const xmlChar * localname)
317{
Daniel Veillard157fee02003-10-31 10:36:03 +0000318 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
319 (ctxt->instate == XML_PARSER_EOF))
320 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000321 if (ctxt != NULL)
322 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200323
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000324 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000325 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200326 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 (const char *) localname, NULL, NULL, 0, 0,
328 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000329 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 (const char *) prefix, (const char *) localname,
333 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
334 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000335 if (ctxt != NULL) {
336 ctxt->wellFormed = 0;
337 if (ctxt->recovery == 0)
338 ctxt->disableSAX = 1;
339 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340}
341
342/**
343 * xmlFatalErr:
344 * @ctxt: an XML parser context
345 * @error: the error number
346 * @extra: extra information string
347 *
348 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
349 */
350static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352{
353 const char *errmsg;
354
Daniel Veillard157fee02003-10-31 10:36:03 +0000355 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356 (ctxt->instate == XML_PARSER_EOF))
357 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 switch (error) {
359 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800360 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800363 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800366 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "internal error";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800372 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800375 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800378 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800381 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800384 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800387 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000389 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800390 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000392 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800393 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800396 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800399 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800402 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000403 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000404 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800405 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800408 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800411 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800414 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800417 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800420 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800423 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800426 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Fragment not allowed";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800441 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800444 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800447 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800450 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800453 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800456 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800459 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
474 case XML_ERR_CONDSEC_INVALID_KEYWORD:
475 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800476 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000477 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000478 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800479 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000480 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800482 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000484 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800485 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000487 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000489 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000490 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800491 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000493 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800494 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000496 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000499 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800503 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000504 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000507 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800509 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000510 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000511 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800512 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000513 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000514 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800515 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000516 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000517 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800518 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800521 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000522 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000523 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800524 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000525 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000526 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800527 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000528 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800529 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800530 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800531 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000532#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800534 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000535 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000536#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000537 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800538 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000539 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000540 if (ctxt != NULL)
541 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800542 if (info == NULL) {
543 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
544 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
545 errmsg);
546 } else {
547 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
548 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
549 errmsg, info);
550 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000551 if (ctxt != NULL) {
552 ctxt->wellFormed = 0;
553 if (ctxt->recovery == 0)
554 ctxt->disableSAX = 1;
555 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000556}
557
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558/**
559 * xmlFatalErrMsg:
560 * @ctxt: an XML parser context
561 * @error: the error number
562 * @msg: the error message
563 *
564 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
565 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800566static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000567xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200576 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000577 if (ctxt != NULL) {
578 ctxt->wellFormed = 0;
579 if (ctxt->recovery == 0)
580 ctxt->disableSAX = 1;
581 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582}
583
584/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585 * xmlWarningMsg:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @str1: extra data
590 * @str2: extra data
591 *
592 * Handle a warning.
593 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800594static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg, const xmlChar *str1, const xmlChar *str2)
597{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000598 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000599
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
604 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000605 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200606 if (ctxt != NULL) {
607 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000608 (ctxt->sax) ? ctxt->sax->warning : NULL,
609 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000610 ctxt, NULL, XML_FROM_PARSER, error,
611 XML_ERR_WARNING, NULL, 0,
612 (const char *) str1, (const char *) str2, NULL, 0, 0,
613 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200614 } else {
615 __xmlRaiseError(schannel, NULL, NULL,
616 ctxt, NULL, XML_FROM_PARSER, error,
617 XML_ERR_WARNING, NULL, 0,
618 (const char *) str1, (const char *) str2, NULL, 0, 0,
619 msg, (const char *) str1, (const char *) str2);
620 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000621}
622
623/**
624 * xmlValidityError:
625 * @ctxt: an XML parser context
626 * @error: the error number
627 * @msg: the error message
628 * @str1: extra data
629 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000630 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000631 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800632static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000633xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000634 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000635{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000636 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000637
638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL) {
642 ctxt->errNo = error;
643 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
644 schannel = ctxt->sax->serror;
645 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200646 if (ctxt != NULL) {
647 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000648 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000649 ctxt, NULL, XML_FROM_DTD, error,
650 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000651 (const char *) str2, NULL, 0, 0,
652 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000653 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200654 } else {
655 __xmlRaiseError(schannel, NULL, NULL,
656 ctxt, NULL, XML_FROM_DTD, error,
657 XML_ERR_ERROR, NULL, 0, (const char *) str1,
658 (const char *) str2, NULL, 0, 0,
659 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000660 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000661}
662
663/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000664 * xmlFatalErrMsgInt:
665 * @ctxt: an XML parser context
666 * @error: the error number
667 * @msg: the error message
668 * @val: an integer value
669 *
670 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
671 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800672static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000673xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000674 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000675{
Daniel Veillard157fee02003-10-31 10:36:03 +0000676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677 (ctxt->instate == XML_PARSER_EOF))
678 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000679 if (ctxt != NULL)
680 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000681 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000682 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
683 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000684 if (ctxt != NULL) {
685 ctxt->wellFormed = 0;
686 if (ctxt->recovery == 0)
687 ctxt->disableSAX = 1;
688 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000689}
690
691/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000692 * xmlFatalErrMsgStrIntStr:
693 * @ctxt: an XML parser context
694 * @error: the error number
695 * @msg: the error message
696 * @str1: an string info
697 * @val: an integer value
698 * @str2: an string info
699 *
700 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
701 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800702static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000703xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800704 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000705 const xmlChar *str2)
706{
Daniel Veillard157fee02003-10-31 10:36:03 +0000707 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
708 (ctxt->instate == XML_PARSER_EOF))
709 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000710 if (ctxt != NULL)
711 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000712 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000713 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
714 NULL, 0, (const char *) str1, (const char *) str2,
715 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL) {
717 ctxt->wellFormed = 0;
718 if (ctxt->recovery == 0)
719 ctxt->disableSAX = 1;
720 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000721}
722
723/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000724 * xmlFatalErrMsgStr:
725 * @ctxt: an XML parser context
726 * @error: the error number
727 * @msg: the error message
728 * @val: a string value
729 *
730 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
731 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800732static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000733xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000734 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000735{
Daniel Veillard157fee02003-10-31 10:36:03 +0000736 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
737 (ctxt->instate == XML_PARSER_EOF))
738 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000739 if (ctxt != NULL)
740 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000741 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000742 XML_FROM_PARSER, error, XML_ERR_FATAL,
743 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
744 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000745 if (ctxt != NULL) {
746 ctxt->wellFormed = 0;
747 if (ctxt->recovery == 0)
748 ctxt->disableSAX = 1;
749 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000750}
751
752/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000753 * xmlErrMsgStr:
754 * @ctxt: an XML parser context
755 * @error: the error number
756 * @msg: the error message
757 * @val: a string value
758 *
759 * Handle a non fatal parser error
760 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800761static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000762xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
763 const char *msg, const xmlChar * val)
764{
Daniel Veillard157fee02003-10-31 10:36:03 +0000765 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
766 (ctxt->instate == XML_PARSER_EOF))
767 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000768 if (ctxt != NULL)
769 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000770 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000771 XML_FROM_PARSER, error, XML_ERR_ERROR,
772 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
773 val);
774}
775
776/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000777 * xmlNsErr:
778 * @ctxt: an XML parser context
779 * @error: the error number
780 * @msg: the message
781 * @info1: extra information string
782 * @info2: extra information string
783 *
784 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
785 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800786static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000787xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
788 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000789 const xmlChar * info1, const xmlChar * info2,
790 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000791{
Daniel Veillard157fee02003-10-31 10:36:03 +0000792 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793 (ctxt->instate == XML_PARSER_EOF))
794 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000795 if (ctxt != NULL)
796 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000797 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000798 XML_ERR_ERROR, NULL, 0, (const char *) info1,
799 (const char *) info2, (const char *) info3, 0, 0, msg,
800 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000801 if (ctxt != NULL)
802 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000803}
804
Daniel Veillard37334572008-07-31 08:20:02 +0000805/**
806 * xmlNsWarn
807 * @ctxt: an XML parser context
808 * @error: the error number
809 * @msg: the message
810 * @info1: extra information string
811 * @info2: extra information string
812 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800813 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000814 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800815static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000816xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
817 const char *msg,
818 const xmlChar * info1, const xmlChar * info2,
819 const xmlChar * info3)
820{
821 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
822 (ctxt->instate == XML_PARSER_EOF))
823 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000824 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825 XML_ERR_WARNING, NULL, 0, (const char *) info1,
826 (const char *) info2, (const char *) info3, 0, 0, msg,
827 info1, info2, info3);
828}
829
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000830/************************************************************************
831 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800832 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833 * *
834 ************************************************************************/
835
836/**
837 * xmlHasFeature:
838 * @feature: the feature to be examined
839 *
840 * Examines if the library has been compiled with a given feature.
841 *
842 * Returns a non-zero value if the feature exist, otherwise zero.
843 * Returns zero (0) if the feature does not exist or an unknown
844 * unknown feature is requested, non-zero otherwise.
845 */
846int
847xmlHasFeature(xmlFeature feature)
848{
849 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_THREAD_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_TREE_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_OUTPUT_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_PUSH_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_READER_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_PATTERN_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_WRITER_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_SAX1_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_FTP_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_HTTP_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_VALID_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_HTML_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_LEGACY_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_C14N_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_CATALOG_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_XPATH_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_XPTR_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_XINCLUDE_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_ICONV_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_ISO8859X_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_UNICODE_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_REGEXP_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef LIBXML_AUTOMATA_ENABLED
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_EXPR_ENABLED
990 return(1);
991#else
992 return(0);
993#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000994 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000995#ifdef LIBXML_SCHEMAS_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001000 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001001#ifdef LIBXML_SCHEMATRON_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001006 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001007#ifdef LIBXML_MODULES_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001012 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001013#ifdef LIBXML_DEBUG_ENABLED
1014 return(1);
1015#else
1016 return(0);
1017#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001018 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001019#ifdef DEBUG_MEMORY_LOCATION
1020 return(1);
1021#else
1022 return(0);
1023#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001024 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001025#ifdef LIBXML_DEBUG_RUNTIME
1026 return(1);
1027#else
1028 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001029#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001030 case XML_WITH_ZLIB:
1031#ifdef LIBXML_ZLIB_ENABLED
1032 return(1);
1033#else
1034 return(0);
1035#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001036 case XML_WITH_LZMA:
1037#ifdef LIBXML_LZMA_ENABLED
1038 return(1);
1039#else
1040 return(0);
1041#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001042 case XML_WITH_ICU:
1043#ifdef LIBXML_ICU_ENABLED
1044 return(1);
1045#else
1046 return(0);
1047#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001048 default:
1049 break;
1050 }
1051 return(0);
1052}
1053
1054/************************************************************************
1055 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001056 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057 * *
1058 ************************************************************************/
1059
1060/**
1061 * xmlDetectSAX2:
1062 * @ctxt: an XML parser context
1063 *
1064 * Do the SAX2 detection and specific intialization
1065 */
1066static void
1067xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1068 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001069#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001070 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1071 ((ctxt->sax->startElementNs != NULL) ||
1072 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001073#else
1074 ctxt->sax2 = 1;
1075#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001076
1077 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1078 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1079 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001080 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1081 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001082 xmlErrMemory(ctxt, NULL);
1083 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001084}
1085
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086typedef struct _xmlDefAttrs xmlDefAttrs;
1087typedef xmlDefAttrs *xmlDefAttrsPtr;
1088struct _xmlDefAttrs {
1089 int nbAttrs; /* number of defaulted attributes on that element */
1090 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001091#if __STDC_VERSION__ >= 199901L
1092 /* Using a C99 flexible array member avoids UBSan errors. */
1093 const xmlChar *values[]; /* array of localname/prefix/values/external */
1094#else
1095 const xmlChar *values[5];
1096#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001097};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001098
1099/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001100 * xmlAttrNormalizeSpace:
1101 * @src: the source string
1102 * @dst: the target string
1103 *
1104 * Normalize the space in non CDATA attribute values:
1105 * If the attribute type is not CDATA, then the XML processor MUST further
1106 * process the normalized attribute value by discarding any leading and
1107 * trailing space (#x20) characters, and by replacing sequences of space
1108 * (#x20) characters by a single space (#x20) character.
1109 * Note that the size of dst need to be at least src, and if one doesn't need
1110 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1111 * passing src as dst is just fine.
1112 *
1113 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1114 * is needed.
1115 */
1116static xmlChar *
1117xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1118{
1119 if ((src == NULL) || (dst == NULL))
1120 return(NULL);
1121
1122 while (*src == 0x20) src++;
1123 while (*src != 0) {
1124 if (*src == 0x20) {
1125 while (*src == 0x20) src++;
1126 if (*src != 0)
1127 *dst++ = 0x20;
1128 } else {
1129 *dst++ = *src++;
1130 }
1131 }
1132 *dst = 0;
1133 if (dst == src)
1134 return(NULL);
1135 return(dst);
1136}
1137
1138/**
1139 * xmlAttrNormalizeSpace2:
1140 * @src: the source string
1141 *
1142 * Normalize the space in non CDATA attribute values, a slightly more complex
1143 * front end to avoid allocation problems when running on attribute values
1144 * coming from the input.
1145 *
1146 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1147 * is needed.
1148 */
1149static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001151{
1152 int i;
1153 int remove_head = 0;
1154 int need_realloc = 0;
1155 const xmlChar *cur;
1156
1157 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1158 return(NULL);
1159 i = *len;
1160 if (i <= 0)
1161 return(NULL);
1162
1163 cur = src;
1164 while (*cur == 0x20) {
1165 cur++;
1166 remove_head++;
1167 }
1168 while (*cur != 0) {
1169 if (*cur == 0x20) {
1170 cur++;
1171 if ((*cur == 0x20) || (*cur == 0)) {
1172 need_realloc = 1;
1173 break;
1174 }
1175 } else
1176 cur++;
1177 }
1178 if (need_realloc) {
1179 xmlChar *ret;
1180
1181 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1182 if (ret == NULL) {
1183 xmlErrMemory(ctxt, NULL);
1184 return(NULL);
1185 }
1186 xmlAttrNormalizeSpace(ret, ret);
1187 *len = (int) strlen((const char *)ret);
1188 return(ret);
1189 } else if (remove_head) {
1190 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001191 memmove(src, src + remove_head, 1 + *len);
1192 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001193 }
1194 return(NULL);
1195}
1196
1197/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001198 * xmlAddDefAttrs:
1199 * @ctxt: an XML parser context
1200 * @fullname: the element fullname
1201 * @fullattr: the attribute fullname
1202 * @value: the attribute value
1203 *
1204 * Add a defaulted attribute for an element
1205 */
1206static void
1207xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1208 const xmlChar *fullname,
1209 const xmlChar *fullattr,
1210 const xmlChar *value) {
1211 xmlDefAttrsPtr defaults;
1212 int len;
1213 const xmlChar *name;
1214 const xmlChar *prefix;
1215
Daniel Veillard6a31b832008-03-26 14:06:44 +00001216 /*
1217 * Allows to detect attribute redefinitions
1218 */
1219 if (ctxt->attsSpecial != NULL) {
1220 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1221 return;
1222 }
1223
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001225 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 if (ctxt->attsDefault == NULL)
1227 goto mem_error;
1228 }
1229
1230 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001231 * split the element name into prefix:localname , the string found
1232 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001233 */
1234 name = xmlSplitQName3(fullname, &len);
1235 if (name == NULL) {
1236 name = xmlDictLookup(ctxt->dict, fullname, -1);
1237 prefix = NULL;
1238 } else {
1239 name = xmlDictLookup(ctxt->dict, name, -1);
1240 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1241 }
1242
1243 /*
1244 * make sure there is some storage
1245 */
1246 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1247 if (defaults == NULL) {
1248 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001249 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001250 if (defaults == NULL)
1251 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001252 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001253 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001254 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1255 defaults, NULL) < 0) {
1256 xmlFree(defaults);
1257 goto mem_error;
1258 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001259 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001260 xmlDefAttrsPtr temp;
1261
1262 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001263 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001264 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001265 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001266 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001267 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001268 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1269 defaults, NULL) < 0) {
1270 xmlFree(defaults);
1271 goto mem_error;
1272 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001273 }
1274
1275 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001276 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001277 * are within the DTD and hen not associated to namespace names.
1278 */
1279 name = xmlSplitQName3(fullattr, &len);
1280 if (name == NULL) {
1281 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1282 prefix = NULL;
1283 } else {
1284 name = xmlDictLookup(ctxt->dict, name, -1);
1285 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1286 }
1287
Daniel Veillardae0765b2008-07-31 19:54:59 +00001288 defaults->values[5 * defaults->nbAttrs] = name;
1289 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001290 /* intern the string and precompute the end */
1291 len = xmlStrlen(value);
1292 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001293 defaults->values[5 * defaults->nbAttrs + 2] = value;
1294 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1295 if (ctxt->external)
1296 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1297 else
1298 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001299 defaults->nbAttrs++;
1300
1301 return;
1302
1303mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001304 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001305 return;
1306}
1307
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001308/**
1309 * xmlAddSpecialAttr:
1310 * @ctxt: an XML parser context
1311 * @fullname: the element fullname
1312 * @fullattr: the attribute fullname
1313 * @type: the attribute type
1314 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001315 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001316 */
1317static void
1318xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1319 const xmlChar *fullname,
1320 const xmlChar *fullattr,
1321 int type)
1322{
1323 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001324 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001325 if (ctxt->attsSpecial == NULL)
1326 goto mem_error;
1327 }
1328
Daniel Veillardac4118d2008-01-11 05:27:32 +00001329 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1330 return;
1331
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001332 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1333 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001334 return;
1335
1336mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001337 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001338 return;
1339}
1340
Daniel Veillard4432df22003-09-28 18:58:27 +00001341/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001342 * xmlCleanSpecialAttrCallback:
1343 *
1344 * Removes CDATA attributes from the special attribute table
1345 */
1346static void
1347xmlCleanSpecialAttrCallback(void *payload, void *data,
1348 const xmlChar *fullname, const xmlChar *fullattr,
1349 const xmlChar *unused ATTRIBUTE_UNUSED) {
1350 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1351
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001352 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001353 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1354 }
1355}
1356
1357/**
1358 * xmlCleanSpecialAttr:
1359 * @ctxt: an XML parser context
1360 *
1361 * Trim the list of attributes defined to remove all those of type
1362 * CDATA as they are not special. This call should be done when finishing
1363 * to parse the DTD and before starting to parse the document root.
1364 */
1365static void
1366xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1367{
1368 if (ctxt->attsSpecial == NULL)
1369 return;
1370
1371 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1372
1373 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1374 xmlHashFree(ctxt->attsSpecial, NULL);
1375 ctxt->attsSpecial = NULL;
1376 }
1377 return;
1378}
1379
1380/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001381 * xmlCheckLanguageID:
1382 * @lang: pointer to the string value
1383 *
1384 * Checks that the value conforms to the LanguageID production:
1385 *
1386 * NOTE: this is somewhat deprecated, those productions were removed from
1387 * the XML Second edition.
1388 *
1389 * [33] LanguageID ::= Langcode ('-' Subcode)*
1390 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1391 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1392 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1393 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1394 * [38] Subcode ::= ([a-z] | [A-Z])+
1395 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001396 * The current REC reference the sucessors of RFC 1766, currently 5646
1397 *
1398 * http://www.rfc-editor.org/rfc/rfc5646.txt
1399 * langtag = language
1400 * ["-" script]
1401 * ["-" region]
1402 * *("-" variant)
1403 * *("-" extension)
1404 * ["-" privateuse]
1405 * language = 2*3ALPHA ; shortest ISO 639 code
1406 * ["-" extlang] ; sometimes followed by
1407 * ; extended language subtags
1408 * / 4ALPHA ; or reserved for future use
1409 * / 5*8ALPHA ; or registered language subtag
1410 *
1411 * extlang = 3ALPHA ; selected ISO 639 codes
1412 * *2("-" 3ALPHA) ; permanently reserved
1413 *
1414 * script = 4ALPHA ; ISO 15924 code
1415 *
1416 * region = 2ALPHA ; ISO 3166-1 code
1417 * / 3DIGIT ; UN M.49 code
1418 *
1419 * variant = 5*8alphanum ; registered variants
1420 * / (DIGIT 3alphanum)
1421 *
1422 * extension = singleton 1*("-" (2*8alphanum))
1423 *
1424 * ; Single alphanumerics
1425 * ; "x" reserved for private use
1426 * singleton = DIGIT ; 0 - 9
1427 * / %x41-57 ; A - W
1428 * / %x59-5A ; Y - Z
1429 * / %x61-77 ; a - w
1430 * / %x79-7A ; y - z
1431 *
1432 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1433 * The parser below doesn't try to cope with extension or privateuse
1434 * that could be added but that's not interoperable anyway
1435 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001436 * Returns 1 if correct 0 otherwise
1437 **/
1438int
1439xmlCheckLanguageID(const xmlChar * lang)
1440{
Daniel Veillard60587d62010-11-04 15:16:27 +01001441 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001442
1443 if (cur == NULL)
1444 return (0);
1445 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001446 ((cur[0] == 'I') && (cur[1] == '-')) ||
1447 ((cur[0] == 'x') && (cur[1] == '-')) ||
1448 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001449 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001450 * Still allow IANA code and user code which were coming
1451 * from the previous version of the XML-1.0 specification
1452 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001453 */
1454 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001455 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001456 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1457 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001458 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001459 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001460 nxt = cur;
1461 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1462 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1463 nxt++;
1464 if (nxt - cur >= 4) {
1465 /*
1466 * Reserved
1467 */
1468 if ((nxt - cur > 8) || (nxt[0] != 0))
1469 return(0);
1470 return(1);
1471 }
1472 if (nxt - cur < 2)
1473 return(0);
1474 /* we got an ISO 639 code */
1475 if (nxt[0] == 0)
1476 return(1);
1477 if (nxt[0] != '-')
1478 return(0);
1479
1480 nxt++;
1481 cur = nxt;
1482 /* now we can have extlang or script or region or variant */
1483 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1484 goto region_m49;
1485
1486 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1487 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1488 nxt++;
1489 if (nxt - cur == 4)
1490 goto script;
1491 if (nxt - cur == 2)
1492 goto region;
1493 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1494 goto variant;
1495 if (nxt - cur != 3)
1496 return(0);
1497 /* we parsed an extlang */
1498 if (nxt[0] == 0)
1499 return(1);
1500 if (nxt[0] != '-')
1501 return(0);
1502
1503 nxt++;
1504 cur = nxt;
1505 /* now we can have script or region or variant */
1506 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1507 goto region_m49;
1508
1509 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1510 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1511 nxt++;
1512 if (nxt - cur == 2)
1513 goto region;
1514 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1515 goto variant;
1516 if (nxt - cur != 4)
1517 return(0);
1518 /* we parsed a script */
1519script:
1520 if (nxt[0] == 0)
1521 return(1);
1522 if (nxt[0] != '-')
1523 return(0);
1524
1525 nxt++;
1526 cur = nxt;
1527 /* now we can have region or variant */
1528 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1529 goto region_m49;
1530
1531 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1532 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1533 nxt++;
1534
1535 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1536 goto variant;
1537 if (nxt - cur != 2)
1538 return(0);
1539 /* we parsed a region */
1540region:
1541 if (nxt[0] == 0)
1542 return(1);
1543 if (nxt[0] != '-')
1544 return(0);
1545
1546 nxt++;
1547 cur = nxt;
1548 /* now we can just have a variant */
1549 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1550 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1551 nxt++;
1552
1553 if ((nxt - cur < 5) || (nxt - cur > 8))
1554 return(0);
1555
1556 /* we parsed a variant */
1557variant:
1558 if (nxt[0] == 0)
1559 return(1);
1560 if (nxt[0] != '-')
1561 return(0);
1562 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001563 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001564
1565region_m49:
1566 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1567 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1568 nxt += 3;
1569 goto region;
1570 }
1571 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001572}
1573
Owen Taylor3473f882001-02-23 17:55:21 +00001574/************************************************************************
1575 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001576 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001577 * *
1578 ************************************************************************/
1579
Daniel Veillard8ed10722009-08-20 19:17:36 +02001580static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1581 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001582
Daniel Veillard0fb18932003-09-07 09:14:37 +00001583#ifdef SAX2
1584/**
1585 * nsPush:
1586 * @ctxt: an XML parser context
1587 * @prefix: the namespace prefix or NULL
1588 * @URL: the namespace name
1589 *
1590 * Pushes a new parser namespace on top of the ns stack
1591 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001592 * Returns -1 in case of error, -2 if the namespace should be discarded
1593 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001594 */
1595static int
1596nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1597{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001598 if (ctxt->options & XML_PARSE_NSCLEAN) {
1599 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001600 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001601 if (ctxt->nsTab[i] == prefix) {
1602 /* in scope */
1603 if (ctxt->nsTab[i + 1] == URL)
1604 return(-2);
1605 /* out of scope keep it */
1606 break;
1607 }
1608 }
1609 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001610 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1611 ctxt->nsMax = 10;
1612 ctxt->nsNr = 0;
1613 ctxt->nsTab = (const xmlChar **)
1614 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1615 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001616 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 ctxt->nsMax = 0;
1618 return (-1);
1619 }
1620 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001621 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001622 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001623 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1624 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1625 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001626 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001627 ctxt->nsMax /= 2;
1628 return (-1);
1629 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001630 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001631 }
1632 ctxt->nsTab[ctxt->nsNr++] = prefix;
1633 ctxt->nsTab[ctxt->nsNr++] = URL;
1634 return (ctxt->nsNr);
1635}
1636/**
1637 * nsPop:
1638 * @ctxt: an XML parser context
1639 * @nr: the number to pop
1640 *
1641 * Pops the top @nr parser prefix/namespace from the ns stack
1642 *
1643 * Returns the number of namespaces removed
1644 */
1645static int
1646nsPop(xmlParserCtxtPtr ctxt, int nr)
1647{
1648 int i;
1649
1650 if (ctxt->nsTab == NULL) return(0);
1651 if (ctxt->nsNr < nr) {
1652 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1653 nr = ctxt->nsNr;
1654 }
1655 if (ctxt->nsNr <= 0)
1656 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001657
Daniel Veillard0fb18932003-09-07 09:14:37 +00001658 for (i = 0;i < nr;i++) {
1659 ctxt->nsNr--;
1660 ctxt->nsTab[ctxt->nsNr] = NULL;
1661 }
1662 return(nr);
1663}
1664#endif
1665
1666static int
1667xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1668 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001669 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001670 int maxatts;
1671
1672 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001673 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001674 atts = (const xmlChar **)
1675 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001677 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1679 if (attallocs == NULL) goto mem_error;
1680 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001681 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001682 } else if (nr + 5 > ctxt->maxatts) {
1683 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001684 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1685 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001686 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001687 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001688 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1689 (maxatts / 5) * sizeof(int));
1690 if (attallocs == NULL) goto mem_error;
1691 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692 ctxt->maxatts = maxatts;
1693 }
1694 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001695mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001696 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001697 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001698}
1699
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001700/**
1701 * inputPush:
1702 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001703 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001704 *
1705 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001706 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001707 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001708 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001709int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001710inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1711{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001712 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001713 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001714 if (ctxt->inputNr >= ctxt->inputMax) {
1715 ctxt->inputMax *= 2;
1716 ctxt->inputTab =
1717 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1718 ctxt->inputMax *
1719 sizeof(ctxt->inputTab[0]));
1720 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001721 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001722 xmlFreeInputStream(value);
1723 ctxt->inputMax /= 2;
1724 value = NULL;
1725 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726 }
1727 }
1728 ctxt->inputTab[ctxt->inputNr] = value;
1729 ctxt->input = value;
1730 return (ctxt->inputNr++);
1731}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001732/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001733 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001734 * @ctxt: an XML parser context
1735 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001736 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001737 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001738 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001739 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001740xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741inputPop(xmlParserCtxtPtr ctxt)
1742{
1743 xmlParserInputPtr ret;
1744
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001745 if (ctxt == NULL)
1746 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001747 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001748 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001749 ctxt->inputNr--;
1750 if (ctxt->inputNr > 0)
1751 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1752 else
1753 ctxt->input = NULL;
1754 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001755 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756 return (ret);
1757}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001758/**
1759 * nodePush:
1760 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001761 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001762 *
1763 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001764 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001765 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001766 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001767int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001768nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1769{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001770 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001771 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001772 xmlNodePtr *tmp;
1773
1774 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1775 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001776 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001777 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001778 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001779 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001780 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001781 ctxt->nodeTab = tmp;
1782 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001784 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1785 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001786 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001787 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001788 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001789 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001790 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001791 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001792 ctxt->nodeTab[ctxt->nodeNr] = value;
1793 ctxt->node = value;
1794 return (ctxt->nodeNr++);
1795}
Daniel Veillard8915c152008-08-26 13:05:34 +00001796
Daniel Veillard1c732d22002-11-30 11:22:59 +00001797/**
1798 * nodePop:
1799 * @ctxt: an XML parser context
1800 *
1801 * Pops the top element node from the node stack
1802 *
1803 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001804 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001805xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001806nodePop(xmlParserCtxtPtr ctxt)
1807{
1808 xmlNodePtr ret;
1809
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001810 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001812 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001813 ctxt->nodeNr--;
1814 if (ctxt->nodeNr > 0)
1815 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1816 else
1817 ctxt->node = NULL;
1818 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001819 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001820 return (ret);
1821}
Daniel Veillarda2351322004-06-27 12:08:10 +00001822
1823#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001824/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001825 * nameNsPush:
1826 * @ctxt: an XML parser context
1827 * @value: the element name
1828 * @prefix: the element prefix
1829 * @URI: the element namespace name
1830 *
1831 * Pushes a new element name/prefix/URL on top of the name stack
1832 *
1833 * Returns -1 in case of error, the index in the stack otherwise
1834 */
1835static int
1836nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1837 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1838{
1839 if (ctxt->nameNr >= ctxt->nameMax) {
1840 const xmlChar * *tmp;
1841 void **tmp2;
1842 ctxt->nameMax *= 2;
1843 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1844 ctxt->nameMax *
1845 sizeof(ctxt->nameTab[0]));
1846 if (tmp == NULL) {
1847 ctxt->nameMax /= 2;
1848 goto mem_error;
1849 }
1850 ctxt->nameTab = tmp;
1851 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1852 ctxt->nameMax * 3 *
1853 sizeof(ctxt->pushTab[0]));
1854 if (tmp2 == NULL) {
1855 ctxt->nameMax /= 2;
1856 goto mem_error;
1857 }
1858 ctxt->pushTab = tmp2;
1859 }
1860 ctxt->nameTab[ctxt->nameNr] = value;
1861 ctxt->name = value;
1862 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1863 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001864 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001865 return (ctxt->nameNr++);
1866mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001867 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001868 return (-1);
1869}
1870/**
1871 * nameNsPop:
1872 * @ctxt: an XML parser context
1873 *
1874 * Pops the top element/prefix/URI name from the name stack
1875 *
1876 * Returns the name just removed
1877 */
1878static const xmlChar *
1879nameNsPop(xmlParserCtxtPtr ctxt)
1880{
1881 const xmlChar *ret;
1882
1883 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001884 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 ctxt->nameNr--;
1886 if (ctxt->nameNr > 0)
1887 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1888 else
1889 ctxt->name = NULL;
1890 ret = ctxt->nameTab[ctxt->nameNr];
1891 ctxt->nameTab[ctxt->nameNr] = NULL;
1892 return (ret);
1893}
Daniel Veillarda2351322004-06-27 12:08:10 +00001894#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001895
1896/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001897 * namePush:
1898 * @ctxt: an XML parser context
1899 * @value: the element name
1900 *
1901 * Pushes a new element name on top of the name stack
1902 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001903 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001904 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001905int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001906namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001907{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001908 if (ctxt == NULL) return (-1);
1909
Daniel Veillard1c732d22002-11-30 11:22:59 +00001910 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001911 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001912 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001913 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001914 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001916 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001917 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001918 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001919 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001920 }
1921 ctxt->nameTab[ctxt->nameNr] = value;
1922 ctxt->name = value;
1923 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001924mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001925 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001926 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001927}
1928/**
1929 * namePop:
1930 * @ctxt: an XML parser context
1931 *
1932 * Pops the top element name from the name stack
1933 *
1934 * Returns the name just removed
1935 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001936const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001937namePop(xmlParserCtxtPtr ctxt)
1938{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001939 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001940
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001941 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1942 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001943 ctxt->nameNr--;
1944 if (ctxt->nameNr > 0)
1945 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1946 else
1947 ctxt->name = NULL;
1948 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001949 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001950 return (ret);
1951}
Owen Taylor3473f882001-02-23 17:55:21 +00001952
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001953static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001954 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001955 int *tmp;
1956
Owen Taylor3473f882001-02-23 17:55:21 +00001957 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001958 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1959 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1960 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001961 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001962 ctxt->spaceMax /=2;
1963 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001964 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001965 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001966 }
1967 ctxt->spaceTab[ctxt->spaceNr] = val;
1968 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1969 return(ctxt->spaceNr++);
1970}
1971
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001972static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001973 int ret;
1974 if (ctxt->spaceNr <= 0) return(0);
1975 ctxt->spaceNr--;
1976 if (ctxt->spaceNr > 0)
1977 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1978 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001979 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001980 ret = ctxt->spaceTab[ctxt->spaceNr];
1981 ctxt->spaceTab[ctxt->spaceNr] = -1;
1982 return(ret);
1983}
1984
1985/*
1986 * Macros for accessing the content. Those should be used only by the parser,
1987 * and not exported.
1988 *
1989 * Dirty macros, i.e. one often need to make assumption on the context to
1990 * use them
1991 *
1992 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1993 * To be used with extreme caution since operations consuming
1994 * characters may move the input buffer to a different location !
1995 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1996 * This should be used internally by the parser
1997 * only to compare to ASCII values otherwise it would break when
1998 * running with UTF-8 encoding.
1999 * RAW same as CUR but in the input buffer, bypass any token
2000 * extraction that may have been done
2001 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2002 * to compare on ASCII based substring.
2003 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002004 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002005 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002006 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002007 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2008 *
2009 * NEXT Skip to the next character, this does the proper decoding
2010 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002011 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002012 * CUR_CHAR(l) returns the current unicode character (int), set l
2013 * to the number of xmlChars used for the encoding [0-5].
2014 * CUR_SCHAR same but operate on a string instead of the context
2015 * COPY_BUF copy the current unicode char to the target buffer, increment
2016 * the index
2017 * GROW, SHRINK handling of input buffers
2018 */
2019
Daniel Veillardfdc91562002-07-01 21:52:03 +00002020#define RAW (*ctxt->input->cur)
2021#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002022#define NXT(val) ctxt->input->cur[(val)]
2023#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002024#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002025
Daniel Veillarda07050d2003-10-19 14:46:32 +00002026#define CMP4( s, c1, c2, c3, c4 ) \
2027 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2028 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2029#define CMP5( s, c1, c2, c3, c4, c5 ) \
2030 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2031#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2032 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2033#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2034 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2035#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2036 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2037#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2038 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2039 ((unsigned char *) s)[ 8 ] == c9 )
2040#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2041 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2042 ((unsigned char *) s)[ 9 ] == c10 )
2043
Owen Taylor3473f882001-02-23 17:55:21 +00002044#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002045 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002046 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002047 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002048 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2049 xmlPopInput(ctxt); \
2050 } while (0)
2051
Daniel Veillard0b787f32004-03-26 17:29:53 +00002052#define SKIPL(val) do { \
2053 int skipl; \
2054 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002055 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002056 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002057 } else ctxt->input->col++; \
2058 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002059 ctxt->input->cur++; \
2060 } \
2061 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2062 if ((*ctxt->input->cur == 0) && \
2063 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2064 xmlPopInput(ctxt); \
2065 } while (0)
2066
Daniel Veillarda880b122003-04-21 21:36:41 +00002067#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002068 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2069 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002070 xmlSHRINK (ctxt);
2071
2072static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2073 xmlParserInputShrink(ctxt->input);
2074 if ((*ctxt->input->cur == 0) &&
2075 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2076 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002077 }
Owen Taylor3473f882001-02-23 17:55:21 +00002078
Daniel Veillarda880b122003-04-21 21:36:41 +00002079#define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002081 xmlGROW (ctxt);
2082
2083static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2086
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002092 xmlHaltParser(ctxt);
2093 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002094 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2100 return;
2101 }
Daniel Veillard59df7832010-02-02 10:24:01 +01002102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002103 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2104 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002105}
Owen Taylor3473f882001-02-23 17:55:21 +00002106
2107#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2108
2109#define NEXT xmlNextChar(ctxt)
2110
Daniel Veillard21a0f912001-02-25 19:54:14 +00002111#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002112 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002113 ctxt->input->cur++; \
2114 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002115 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002116 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2117 }
2118
Owen Taylor3473f882001-02-23 17:55:21 +00002119#define NEXTL(l) do { \
2120 if (*(ctxt->input->cur) == '\n') { \
2121 ctxt->input->line++; ctxt->input->col = 1; \
2122 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002123 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002124 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002125 } while (0)
2126
2127#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2128#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2129
2130#define COPY_BUF(l,b,i,v) \
2131 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002132 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002133
2134/**
2135 * xmlSkipBlankChars:
2136 * @ctxt: the XML parser context
2137 *
2138 * skip all blanks character found at that point in the input streams.
2139 * It pops up finished entities in the process if allowable at that point.
2140 *
2141 * Returns the number of space chars skipped
2142 */
2143
2144int
2145xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002146 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002147
2148 /*
2149 * It's Okay to use CUR/NEXT here since all the blanks are on
2150 * the ASCII range.
2151 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002152 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2153 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002154 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002155 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002156 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002157 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002158 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002159 if (*cur == '\n') {
2160 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002161 } else {
2162 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002163 }
2164 cur++;
2165 res++;
2166 if (*cur == 0) {
2167 ctxt->input->cur = cur;
2168 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2169 cur = ctxt->input->cur;
2170 }
2171 }
2172 ctxt->input->cur = cur;
2173 } else {
2174 int cur;
2175 do {
2176 cur = CUR;
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002177 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2178 (ctxt->instate != XML_PARSER_EOF))) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002179 NEXT;
2180 cur = CUR;
2181 res++;
2182 }
2183 while ((cur == 0) && (ctxt->inputNr > 1) &&
2184 (ctxt->instate != XML_PARSER_COMMENT)) {
2185 xmlPopInput(ctxt);
2186 cur = CUR;
2187 }
2188 /*
2189 * Need to handle support of entities branching here
2190 */
2191 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002192 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2193 (ctxt->instate != XML_PARSER_EOF));
Daniel Veillard02141ea2001-04-30 11:46:40 +00002194 }
Owen Taylor3473f882001-02-23 17:55:21 +00002195 return(res);
2196}
2197
2198/************************************************************************
2199 * *
2200 * Commodity functions to handle entities *
2201 * *
2202 ************************************************************************/
2203
2204/**
2205 * xmlPopInput:
2206 * @ctxt: an XML parser context
2207 *
2208 * xmlPopInput: the current input pointed by ctxt->input came to an end
2209 * pop it and return the next char.
2210 *
2211 * Returns the current xmlChar in the parser context
2212 */
2213xmlChar
2214xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002215 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002216 if (xmlParserDebugEntities)
2217 xmlGenericError(xmlGenericErrorContext,
2218 "Popping input %d\n", ctxt->inputNr);
2219 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002220 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002221 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2222 return(xmlPopInput(ctxt));
2223 return(CUR);
2224}
2225
2226/**
2227 * xmlPushInput:
2228 * @ctxt: an XML parser context
2229 * @input: an XML parser input fragment (entity, XML fragment ...).
2230 *
2231 * xmlPushInput: switch to a new input stream which is stacked on top
2232 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002233 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002234 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002235int
Owen Taylor3473f882001-02-23 17:55:21 +00002236xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002237 int ret;
2238 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002239
2240 if (xmlParserDebugEntities) {
2241 if ((ctxt->input != NULL) && (ctxt->input->filename))
2242 xmlGenericError(xmlGenericErrorContext,
2243 "%s(%d): ", ctxt->input->filename,
2244 ctxt->input->line);
2245 xmlGenericError(xmlGenericErrorContext,
2246 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2247 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002248 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002249 if (ctxt->instate == XML_PARSER_EOF)
2250 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002251 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002252 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002253}
2254
2255/**
2256 * xmlParseCharRef:
2257 * @ctxt: an XML parser context
2258 *
2259 * parse Reference declarations
2260 *
2261 * [66] CharRef ::= '&#' [0-9]+ ';' |
2262 * '&#x' [0-9a-fA-F]+ ';'
2263 *
2264 * [ WFC: Legal Character ]
2265 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002266 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002267 *
2268 * Returns the value parsed (as an int), 0 in case of error
2269 */
2270int
2271xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002272 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002273 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002274 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002275
Owen Taylor3473f882001-02-23 17:55:21 +00002276 /*
2277 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2278 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002279 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002280 (NXT(2) == 'x')) {
2281 SKIP(3);
2282 GROW;
2283 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002284 if (count++ > 20) {
2285 count = 0;
2286 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002287 if (ctxt->instate == XML_PARSER_EOF)
2288 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002289 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002290 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002291 val = val * 16 + (CUR - '0');
2292 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2293 val = val * 16 + (CUR - 'a') + 10;
2294 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2295 val = val * 16 + (CUR - 'A') + 10;
2296 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002297 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002298 val = 0;
2299 break;
2300 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002301 if (val > 0x10FFFF)
2302 outofrange = val;
2303
Owen Taylor3473f882001-02-23 17:55:21 +00002304 NEXT;
2305 count++;
2306 }
2307 if (RAW == ';') {
2308 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002309 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002310 ctxt->nbChars ++;
2311 ctxt->input->cur++;
2312 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002313 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002314 SKIP(2);
2315 GROW;
2316 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002317 if (count++ > 20) {
2318 count = 0;
2319 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002320 if (ctxt->instate == XML_PARSER_EOF)
2321 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002322 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002323 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002324 val = val * 10 + (CUR - '0');
2325 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002326 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002327 val = 0;
2328 break;
2329 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002330 if (val > 0x10FFFF)
2331 outofrange = val;
2332
Owen Taylor3473f882001-02-23 17:55:21 +00002333 NEXT;
2334 count++;
2335 }
2336 if (RAW == ';') {
2337 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002338 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002339 ctxt->nbChars ++;
2340 ctxt->input->cur++;
2341 }
2342 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002343 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002344 }
2345
2346 /*
2347 * [ WFC: Legal Character ]
2348 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002349 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002350 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002351 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002352 return(val);
2353 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002354 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2355 "xmlParseCharRef: invalid xmlChar value %d\n",
2356 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002357 }
2358 return(0);
2359}
2360
2361/**
2362 * xmlParseStringCharRef:
2363 * @ctxt: an XML parser context
2364 * @str: a pointer to an index in the string
2365 *
2366 * parse Reference declarations, variant parsing from a string rather
2367 * than an an input flow.
2368 *
2369 * [66] CharRef ::= '&#' [0-9]+ ';' |
2370 * '&#x' [0-9a-fA-F]+ ';'
2371 *
2372 * [ WFC: Legal Character ]
2373 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002374 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002375 *
2376 * Returns the value parsed (as an int), 0 in case of error, str will be
2377 * updated to the current value of the index
2378 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002379static int
Owen Taylor3473f882001-02-23 17:55:21 +00002380xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2381 const xmlChar *ptr;
2382 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002383 unsigned int val = 0;
2384 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002385
2386 if ((str == NULL) || (*str == NULL)) return(0);
2387 ptr = *str;
2388 cur = *ptr;
2389 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2390 ptr += 3;
2391 cur = *ptr;
2392 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002393 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002394 val = val * 16 + (cur - '0');
2395 else if ((cur >= 'a') && (cur <= 'f'))
2396 val = val * 16 + (cur - 'a') + 10;
2397 else if ((cur >= 'A') && (cur <= 'F'))
2398 val = val * 16 + (cur - 'A') + 10;
2399 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002400 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002401 val = 0;
2402 break;
2403 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002404 if (val > 0x10FFFF)
2405 outofrange = val;
2406
Owen Taylor3473f882001-02-23 17:55:21 +00002407 ptr++;
2408 cur = *ptr;
2409 }
2410 if (cur == ';')
2411 ptr++;
2412 } else if ((cur == '&') && (ptr[1] == '#')){
2413 ptr += 2;
2414 cur = *ptr;
2415 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002416 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002417 val = val * 10 + (cur - '0');
2418 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002419 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002420 val = 0;
2421 break;
2422 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002423 if (val > 0x10FFFF)
2424 outofrange = val;
2425
Owen Taylor3473f882001-02-23 17:55:21 +00002426 ptr++;
2427 cur = *ptr;
2428 }
2429 if (cur == ';')
2430 ptr++;
2431 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002432 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002433 return(0);
2434 }
2435 *str = ptr;
2436
2437 /*
2438 * [ WFC: Legal Character ]
2439 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002440 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002441 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002442 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002443 return(val);
2444 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002445 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2446 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2447 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002448 }
2449 return(0);
2450}
2451
2452/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002453 * xmlNewBlanksWrapperInputStream:
2454 * @ctxt: an XML parser context
2455 * @entity: an Entity pointer
2456 *
2457 * Create a new input stream for wrapping
2458 * blanks around a PEReference
2459 *
2460 * Returns the new input stream or NULL
2461 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002462
Daniel Veillardf5582f12002-06-11 10:08:16 +00002463static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002464
Daniel Veillardf4862f02002-09-10 11:13:43 +00002465static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002466xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2467 xmlParserInputPtr input;
2468 xmlChar *buffer;
2469 size_t length;
2470 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2472 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002473 return(NULL);
2474 }
2475 if (xmlParserDebugEntities)
2476 xmlGenericError(xmlGenericErrorContext,
2477 "new blanks wrapper for entity: %s\n", entity->name);
2478 input = xmlNewInputStream(ctxt);
2479 if (input == NULL) {
2480 return(NULL);
2481 }
2482 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002483 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002484 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002485 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002486 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002487 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002488 }
2489 buffer [0] = ' ';
2490 buffer [1] = '%';
2491 buffer [length-3] = ';';
2492 buffer [length-2] = ' ';
2493 buffer [length-1] = 0;
2494 memcpy(buffer + 2, entity->name, length - 5);
2495 input->free = deallocblankswrapper;
2496 input->base = buffer;
2497 input->cur = buffer;
2498 input->length = length;
2499 input->end = &buffer[length];
2500 return(input);
2501}
2502
2503/**
Owen Taylor3473f882001-02-23 17:55:21 +00002504 * xmlParserHandlePEReference:
2505 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002506 *
Owen Taylor3473f882001-02-23 17:55:21 +00002507 * [69] PEReference ::= '%' Name ';'
2508 *
2509 * [ WFC: No Recursion ]
2510 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002511 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002512 *
2513 * [ WFC: Entity Declared ]
2514 * In a document without any DTD, a document with only an internal DTD
2515 * subset which contains no parameter entity references, or a document
2516 * with "standalone='yes'", ... ... The declaration of a parameter
2517 * entity must precede any reference to it...
2518 *
2519 * [ VC: Entity Declared ]
2520 * In a document with an external subset or external parameter entities
2521 * with "standalone='no'", ... ... The declaration of a parameter entity
2522 * must precede any reference to it...
2523 *
2524 * [ WFC: In DTD ]
2525 * Parameter-entity references may only appear in the DTD.
2526 * NOTE: misleading but this is handled.
2527 *
2528 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002529 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002530 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002531 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002532 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002533 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002534 */
2535void
2536xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002537 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002538 xmlEntityPtr entity = NULL;
2539 xmlParserInputPtr input;
2540
Owen Taylor3473f882001-02-23 17:55:21 +00002541 if (RAW != '%') return;
2542 switch(ctxt->instate) {
2543 case XML_PARSER_CDATA_SECTION:
2544 return;
2545 case XML_PARSER_COMMENT:
2546 return;
2547 case XML_PARSER_START_TAG:
2548 return;
2549 case XML_PARSER_END_TAG:
2550 return;
2551 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002552 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002553 return;
2554 case XML_PARSER_PROLOG:
2555 case XML_PARSER_START:
2556 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002557 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002558 return;
2559 case XML_PARSER_ENTITY_DECL:
2560 case XML_PARSER_CONTENT:
2561 case XML_PARSER_ATTRIBUTE_VALUE:
2562 case XML_PARSER_PI:
2563 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002564 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002565 /* we just ignore it there */
2566 return;
2567 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002568 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002569 return;
2570 case XML_PARSER_ENTITY_VALUE:
2571 /*
2572 * NOTE: in the case of entity values, we don't do the
2573 * substitution here since we need the literal
2574 * entity value to be able to save the internal
2575 * subset of the document.
2576 * This will be handled by xmlStringDecodeEntities
2577 */
2578 return;
2579 case XML_PARSER_DTD:
2580 /*
2581 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2582 * In the internal DTD subset, parameter-entity references
2583 * can occur only where markup declarations can occur, not
2584 * within markup declarations.
2585 * In that case this is handled in xmlParseMarkupDecl
2586 */
2587 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2588 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002589 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002590 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002591 break;
2592 case XML_PARSER_IGNORE:
2593 return;
2594 }
2595
2596 NEXT;
2597 name = xmlParseName(ctxt);
2598 if (xmlParserDebugEntities)
2599 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002600 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002602 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002603 } else {
2604 if (RAW == ';') {
2605 NEXT;
2606 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2607 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002608 if (ctxt->instate == XML_PARSER_EOF)
2609 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002610 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002611
Owen Taylor3473f882001-02-23 17:55:21 +00002612 /*
2613 * [ WFC: Entity Declared ]
2614 * In a document without any DTD, a document with only an
2615 * internal DTD subset which contains no parameter entity
2616 * references, or a document with "standalone='yes'", ...
2617 * ... The declaration of a parameter entity must precede
2618 * any reference to it...
2619 */
2620 if ((ctxt->standalone == 1) ||
2621 ((ctxt->hasExternalSubset == 0) &&
2622 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002623 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002624 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002625 } else {
2626 /*
2627 * [ VC: Entity Declared ]
2628 * In a document with an external subset or external
2629 * parameter entities with "standalone='no'", ...
2630 * ... The declaration of a parameter entity must precede
2631 * any reference to it...
2632 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002633 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2634 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2635 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002636 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002637 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002638 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2639 "PEReference: %%%s; not found\n",
2640 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002641 ctxt->valid = 0;
2642 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002643 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002644 } else if (ctxt->input->free != deallocblankswrapper) {
2645 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002646 if (xmlPushInput(ctxt, input) < 0)
2647 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002648 } else {
2649 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2650 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002651 xmlChar start[4];
2652 xmlCharEncoding enc;
2653
Owen Taylor3473f882001-02-23 17:55:21 +00002654 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002655 * Note: external parameter entities will not be loaded, it
2656 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002657 * option of validating, or substituting entities were
2658 * given. Doing so is far more secure as the parser will
2659 * only process data coming from the document entity by
2660 * default.
2661 */
2662 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2663 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2664 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002665 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2666 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2667 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002668 (ctxt->validate == 0))
2669 return;
2670
2671 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002672 * handle the extra spaces added before and after
2673 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002674 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002675 */
2676 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002677 if (xmlPushInput(ctxt, input) < 0)
2678 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002679
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002680 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002681 * Get the 4 first bytes and decode the charset
2682 * if enc != XML_CHAR_ENCODING_NONE
2683 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002684 * Note that, since we may have some non-UTF8
2685 * encoding (like UTF16, bug 135229), the 'length'
2686 * is not known, but we can calculate based upon
2687 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002688 */
2689 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002690 if (ctxt->instate == XML_PARSER_EOF)
2691 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002692 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002693 start[0] = RAW;
2694 start[1] = NXT(1);
2695 start[2] = NXT(2);
2696 start[3] = NXT(3);
2697 enc = xmlDetectCharEncoding(start, 4);
2698 if (enc != XML_CHAR_ENCODING_NONE) {
2699 xmlSwitchEncoding(ctxt, enc);
2700 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002701 }
2702
Owen Taylor3473f882001-02-23 17:55:21 +00002703 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002704 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2705 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002706 xmlParseTextDecl(ctxt);
2707 }
Owen Taylor3473f882001-02-23 17:55:21 +00002708 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002709 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2710 "PEReference: %s is not a parameter entity\n",
2711 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002712 }
2713 }
2714 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002715 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
Owen Taylor3473f882001-02-23 17:55:21 +00002717 }
2718}
2719
2720/*
2721 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002722 * buffer##_size is expected to be a size_t
2723 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002724 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002725#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002726 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002727 size_t new_size = buffer##_size * 2 + n; \
2728 if (new_size < buffer##_size) goto mem_error; \
2729 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002730 if (tmp == NULL) goto mem_error; \
2731 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002732 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002733}
2734
2735/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002736 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002737 * @ctxt: the parser context
2738 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002739 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002740 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2741 * @end: an end marker xmlChar, 0 if none
2742 * @end2: an end marker xmlChar, 0 if none
2743 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002744 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002745 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002746 *
2747 * [67] Reference ::= EntityRef | CharRef
2748 *
2749 * [69] PEReference ::= '%' Name ';'
2750 *
2751 * Returns A newly allocated string with the substitution done. The caller
2752 * must deallocate it !
2753 */
2754xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002755xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2756 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002757 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002758 size_t buffer_size = 0;
2759 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002760
2761 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002762 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002763 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002764 xmlEntityPtr ent;
2765 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002766
Daniel Veillarda82b1822004-11-08 16:24:57 +00002767 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002768 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002769 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002770
Daniel Veillard0161e632008-08-28 15:36:32 +00002771 if (((ctxt->depth > 40) &&
2772 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2773 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002774 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002775 return(NULL);
2776 }
2777
2778 /*
2779 * allocate a translation buffer.
2780 */
2781 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002782 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002783 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002784
2785 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002786 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002787 * we are operating on already parsed values.
2788 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002789 if (str < last)
2790 c = CUR_SCHAR(str, l);
2791 else
2792 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002793 while ((c != 0) && (c != end) && /* non input consuming loop */
2794 (c != end2) && (c != end3)) {
2795
2796 if (c == 0) break;
2797 if ((c == '&') && (str[1] == '#')) {
2798 int val = xmlParseStringCharRef(ctxt, &str);
2799 if (val != 0) {
2800 COPY_BUF(0,buffer,nbchars,val);
2801 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002802 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002803 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002804 }
Owen Taylor3473f882001-02-23 17:55:21 +00002805 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2806 if (xmlParserDebugEntities)
2807 xmlGenericError(xmlGenericErrorContext,
2808 "String decoding Entity Reference: %.30s\n",
2809 str);
2810 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002811 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2812 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002813 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002814 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002815 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002816 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002817 if ((ent != NULL) &&
2818 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2819 if (ent->content != NULL) {
2820 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002821 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002822 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002823 }
Owen Taylor3473f882001-02-23 17:55:21 +00002824 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002825 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2826 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002827 }
2828 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002829 ctxt->depth++;
2830 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2831 0, 0, 0);
2832 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002833
David Drysdale69030712015-11-20 11:13:45 +08002834 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2835 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2836 goto int_error;
2837
Owen Taylor3473f882001-02-23 17:55:21 +00002838 if (rep != NULL) {
2839 current = rep;
2840 while (*current != 0) { /* non input consuming loop */
2841 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002842 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002843 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002844 goto int_error;
2845 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002846 }
2847 }
2848 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002849 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002850 }
2851 } else if (ent != NULL) {
2852 int i = xmlStrlen(ent->name);
2853 const xmlChar *cur = ent->name;
2854
2855 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002856 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002857 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002858 }
2859 for (;i > 0;i--)
2860 buffer[nbchars++] = *cur++;
2861 buffer[nbchars++] = ';';
2862 }
2863 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2864 if (xmlParserDebugEntities)
2865 xmlGenericError(xmlGenericErrorContext,
2866 "String decoding PE Reference: %.30s\n", str);
2867 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002868 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2869 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002870 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002871 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002872 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002873 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002874 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002875 /*
2876 * Note: external parsed entities will not be loaded,
2877 * it is not required for a non-validating parser to
2878 * complete external PEreferences coming from the
2879 * internal subset
2880 */
2881 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2882 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2883 (ctxt->validate != 0)) {
2884 xmlLoadEntityContent(ctxt, ent);
2885 } else {
2886 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2887 "not validating will not read content for PE entity %s\n",
2888 ent->name, NULL);
2889 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002890 }
Owen Taylor3473f882001-02-23 17:55:21 +00002891 ctxt->depth++;
2892 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2893 0, 0, 0);
2894 ctxt->depth--;
2895 if (rep != NULL) {
2896 current = rep;
2897 while (*current != 0) { /* non input consuming loop */
2898 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002899 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002900 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002901 goto int_error;
2902 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002903 }
2904 }
2905 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002906 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002907 }
2908 }
2909 } else {
2910 COPY_BUF(l,buffer,nbchars,c);
2911 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002912 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2913 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002914 }
2915 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002916 if (str < last)
2917 c = CUR_SCHAR(str, l);
2918 else
2919 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002920 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002921 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002922 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002923
2924mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002925 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002926int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002927 if (rep != NULL)
2928 xmlFree(rep);
2929 if (buffer != NULL)
2930 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002931 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002932}
2933
Daniel Veillarde57ec792003-09-10 10:50:59 +00002934/**
2935 * xmlStringDecodeEntities:
2936 * @ctxt: the parser context
2937 * @str: the input string
2938 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2939 * @end: an end marker xmlChar, 0 if none
2940 * @end2: an end marker xmlChar, 0 if none
2941 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002942 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002943 * Takes a entity string content and process to do the adequate substitutions.
2944 *
2945 * [67] Reference ::= EntityRef | CharRef
2946 *
2947 * [69] PEReference ::= '%' Name ';'
2948 *
2949 * Returns A newly allocated string with the substitution done. The caller
2950 * must deallocate it !
2951 */
2952xmlChar *
2953xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2954 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002955 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002956 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2957 end, end2, end3));
2958}
Owen Taylor3473f882001-02-23 17:55:21 +00002959
2960/************************************************************************
2961 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002962 * Commodity functions, cleanup needed ? *
2963 * *
2964 ************************************************************************/
2965
2966/**
2967 * areBlanks:
2968 * @ctxt: an XML parser context
2969 * @str: a xmlChar *
2970 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002971 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002972 *
2973 * Is this a sequence of blank chars that one can ignore ?
2974 *
2975 * Returns 1 if ignorable 0 otherwise.
2976 */
2977
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002978static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2979 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002980 int i, ret;
2981 xmlNodePtr lastChild;
2982
Daniel Veillard05c13a22001-09-09 08:38:09 +00002983 /*
2984 * Don't spend time trying to differentiate them, the same callback is
2985 * used !
2986 */
2987 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002988 return(0);
2989
Owen Taylor3473f882001-02-23 17:55:21 +00002990 /*
2991 * Check for xml:space value.
2992 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002993 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2994 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002995 return(0);
2996
2997 /*
2998 * Check that the string is made of blanks
2999 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003000 if (blank_chars == 0) {
3001 for (i = 0;i < len;i++)
3002 if (!(IS_BLANK_CH(str[i]))) return(0);
3003 }
Owen Taylor3473f882001-02-23 17:55:21 +00003004
3005 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003006 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00003007 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00003008 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003009 if (ctxt->myDoc != NULL) {
3010 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3011 if (ret == 0) return(1);
3012 if (ret == 1) return(0);
3013 }
3014
3015 /*
3016 * Otherwise, heuristic :-\
3017 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00003018 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003019 if ((ctxt->node->children == NULL) &&
3020 (RAW == '<') && (NXT(1) == '/')) return(0);
3021
3022 lastChild = xmlGetLastChild(ctxt->node);
3023 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00003024 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3025 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003026 } else if (xmlNodeIsText(lastChild))
3027 return(0);
3028 else if ((ctxt->node->children != NULL) &&
3029 (xmlNodeIsText(ctxt->node->children)))
3030 return(0);
3031 return(1);
3032}
3033
Owen Taylor3473f882001-02-23 17:55:21 +00003034/************************************************************************
3035 * *
3036 * Extra stuff for namespace support *
3037 * Relates to http://www.w3.org/TR/WD-xml-names *
3038 * *
3039 ************************************************************************/
3040
3041/**
3042 * xmlSplitQName:
3043 * @ctxt: an XML parser context
3044 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003045 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00003046 *
3047 * parse an UTF8 encoded XML qualified name string
3048 *
3049 * [NS 5] QName ::= (Prefix ':')? LocalPart
3050 *
3051 * [NS 6] Prefix ::= NCName
3052 *
3053 * [NS 7] LocalPart ::= NCName
3054 *
3055 * Returns the local part, and prefix is updated
3056 * to get the Prefix if any.
3057 */
3058
3059xmlChar *
3060xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3061 xmlChar buf[XML_MAX_NAMELEN + 5];
3062 xmlChar *buffer = NULL;
3063 int len = 0;
3064 int max = XML_MAX_NAMELEN;
3065 xmlChar *ret = NULL;
3066 const xmlChar *cur = name;
3067 int c;
3068
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003069 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003070 *prefix = NULL;
3071
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00003072 if (cur == NULL) return(NULL);
3073
Owen Taylor3473f882001-02-23 17:55:21 +00003074#ifndef XML_XML_NAMESPACE
3075 /* xml: prefix is not really a namespace */
3076 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3077 (cur[2] == 'l') && (cur[3] == ':'))
3078 return(xmlStrdup(name));
3079#endif
3080
Daniel Veillard597bc482003-07-24 16:08:28 +00003081 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003082 if (cur[0] == ':')
3083 return(xmlStrdup(name));
3084
3085 c = *cur++;
3086 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3087 buf[len++] = c;
3088 c = *cur++;
3089 }
3090 if (len >= max) {
3091 /*
3092 * Okay someone managed to make a huge name, so he's ready to pay
3093 * for the processing speed.
3094 */
3095 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003096
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003097 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003098 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003099 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003100 return(NULL);
3101 }
3102 memcpy(buffer, buf, len);
3103 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3104 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003105 xmlChar *tmp;
3106
Owen Taylor3473f882001-02-23 17:55:21 +00003107 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003108 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003109 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003110 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003111 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003112 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003113 return(NULL);
3114 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003115 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003116 }
3117 buffer[len++] = c;
3118 c = *cur++;
3119 }
3120 buffer[len] = 0;
3121 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003122
Daniel Veillard597bc482003-07-24 16:08:28 +00003123 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003124 if (buffer != NULL)
3125 xmlFree(buffer);
3126 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003127 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003128 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003129
Owen Taylor3473f882001-02-23 17:55:21 +00003130 if (buffer == NULL)
3131 ret = xmlStrndup(buf, len);
3132 else {
3133 ret = buffer;
3134 buffer = NULL;
3135 max = XML_MAX_NAMELEN;
3136 }
3137
3138
3139 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003140 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003141 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003142 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003143 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003144 }
Owen Taylor3473f882001-02-23 17:55:21 +00003145 len = 0;
3146
Daniel Veillardbb284f42002-10-16 18:02:47 +00003147 /*
3148 * Check that the first character is proper to start
3149 * a new name
3150 */
3151 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3152 ((c >= 0x41) && (c <= 0x5A)) ||
3153 (c == '_') || (c == ':'))) {
3154 int l;
3155 int first = CUR_SCHAR(cur, l);
3156
3157 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003158 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003159 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003160 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003161 }
3162 }
3163 cur++;
3164
Owen Taylor3473f882001-02-23 17:55:21 +00003165 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3166 buf[len++] = c;
3167 c = *cur++;
3168 }
3169 if (len >= max) {
3170 /*
3171 * Okay someone managed to make a huge name, so he's ready to pay
3172 * for the processing speed.
3173 */
3174 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003175
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003176 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003177 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003178 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003179 return(NULL);
3180 }
3181 memcpy(buffer, buf, len);
3182 while (c != 0) { /* tested bigname2.xml */
3183 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003184 xmlChar *tmp;
3185
Owen Taylor3473f882001-02-23 17:55:21 +00003186 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003187 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003188 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003189 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003190 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003191 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003192 return(NULL);
3193 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003194 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003195 }
3196 buffer[len++] = c;
3197 c = *cur++;
3198 }
3199 buffer[len] = 0;
3200 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003201
Owen Taylor3473f882001-02-23 17:55:21 +00003202 if (buffer == NULL)
3203 ret = xmlStrndup(buf, len);
3204 else {
3205 ret = buffer;
3206 }
3207 }
3208
3209 return(ret);
3210}
3211
3212/************************************************************************
3213 * *
3214 * The parser itself *
3215 * Relates to http://www.w3.org/TR/REC-xml *
3216 * *
3217 ************************************************************************/
3218
Daniel Veillard34e3f642008-07-29 09:02:27 +00003219/************************************************************************
3220 * *
3221 * Routines to parse Name, NCName and NmToken *
3222 * *
3223 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003224#ifdef DEBUG
3225static unsigned long nbParseName = 0;
3226static unsigned long nbParseNmToken = 0;
3227static unsigned long nbParseNCName = 0;
3228static unsigned long nbParseNCNameComplex = 0;
3229static unsigned long nbParseNameComplex = 0;
3230static unsigned long nbParseStringName = 0;
3231#endif
3232
Daniel Veillard34e3f642008-07-29 09:02:27 +00003233/*
3234 * The two following functions are related to the change of accepted
3235 * characters for Name and NmToken in the Revision 5 of XML-1.0
3236 * They correspond to the modified production [4] and the new production [4a]
3237 * changes in that revision. Also note that the macros used for the
3238 * productions Letter, Digit, CombiningChar and Extender are not needed
3239 * anymore.
3240 * We still keep compatibility to pre-revision5 parsing semantic if the
3241 * new XML_PARSE_OLD10 option is given to the parser.
3242 */
3243static int
3244xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3245 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3246 /*
3247 * Use the new checks of production [4] [4a] amd [5] of the
3248 * Update 5 of XML-1.0
3249 */
3250 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3251 (((c >= 'a') && (c <= 'z')) ||
3252 ((c >= 'A') && (c <= 'Z')) ||
3253 (c == '_') || (c == ':') ||
3254 ((c >= 0xC0) && (c <= 0xD6)) ||
3255 ((c >= 0xD8) && (c <= 0xF6)) ||
3256 ((c >= 0xF8) && (c <= 0x2FF)) ||
3257 ((c >= 0x370) && (c <= 0x37D)) ||
3258 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3259 ((c >= 0x200C) && (c <= 0x200D)) ||
3260 ((c >= 0x2070) && (c <= 0x218F)) ||
3261 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3262 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3263 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3264 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3265 ((c >= 0x10000) && (c <= 0xEFFFF))))
3266 return(1);
3267 } else {
3268 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3269 return(1);
3270 }
3271 return(0);
3272}
3273
3274static int
3275xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3276 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3277 /*
3278 * Use the new checks of production [4] [4a] amd [5] of the
3279 * Update 5 of XML-1.0
3280 */
3281 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3282 (((c >= 'a') && (c <= 'z')) ||
3283 ((c >= 'A') && (c <= 'Z')) ||
3284 ((c >= '0') && (c <= '9')) || /* !start */
3285 (c == '_') || (c == ':') ||
3286 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3287 ((c >= 0xC0) && (c <= 0xD6)) ||
3288 ((c >= 0xD8) && (c <= 0xF6)) ||
3289 ((c >= 0xF8) && (c <= 0x2FF)) ||
3290 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3291 ((c >= 0x370) && (c <= 0x37D)) ||
3292 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3293 ((c >= 0x200C) && (c <= 0x200D)) ||
3294 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3295 ((c >= 0x2070) && (c <= 0x218F)) ||
3296 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3297 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3298 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3299 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3300 ((c >= 0x10000) && (c <= 0xEFFFF))))
3301 return(1);
3302 } else {
3303 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3304 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003305 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003306 (IS_COMBINING(c)) ||
3307 (IS_EXTENDER(c)))
3308 return(1);
3309 }
3310 return(0);
3311}
3312
Daniel Veillarde57ec792003-09-10 10:50:59 +00003313static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003314 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003315
Daniel Veillard34e3f642008-07-29 09:02:27 +00003316static const xmlChar *
3317xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3318 int len = 0, l;
3319 int c;
3320 int count = 0;
3321
Daniel Veillardc6561462009-03-25 10:22:31 +00003322#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003324#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003325
3326 /*
3327 * Handler for more complex cases
3328 */
3329 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003330 if (ctxt->instate == XML_PARSER_EOF)
3331 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332 c = CUR_CHAR(l);
3333 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3334 /*
3335 * Use the new checks of production [4] [4a] amd [5] of the
3336 * Update 5 of XML-1.0
3337 */
3338 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3339 (!(((c >= 'a') && (c <= 'z')) ||
3340 ((c >= 'A') && (c <= 'Z')) ||
3341 (c == '_') || (c == ':') ||
3342 ((c >= 0xC0) && (c <= 0xD6)) ||
3343 ((c >= 0xD8) && (c <= 0xF6)) ||
3344 ((c >= 0xF8) && (c <= 0x2FF)) ||
3345 ((c >= 0x370) && (c <= 0x37D)) ||
3346 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3347 ((c >= 0x200C) && (c <= 0x200D)) ||
3348 ((c >= 0x2070) && (c <= 0x218F)) ||
3349 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3350 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3351 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3352 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3353 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3354 return(NULL);
3355 }
3356 len += l;
3357 NEXTL(l);
3358 c = CUR_CHAR(l);
3359 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3360 (((c >= 'a') && (c <= 'z')) ||
3361 ((c >= 'A') && (c <= 'Z')) ||
3362 ((c >= '0') && (c <= '9')) || /* !start */
3363 (c == '_') || (c == ':') ||
3364 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3365 ((c >= 0xC0) && (c <= 0xD6)) ||
3366 ((c >= 0xD8) && (c <= 0xF6)) ||
3367 ((c >= 0xF8) && (c <= 0x2FF)) ||
3368 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3369 ((c >= 0x370) && (c <= 0x37D)) ||
3370 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3371 ((c >= 0x200C) && (c <= 0x200D)) ||
3372 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3373 ((c >= 0x2070) && (c <= 0x218F)) ||
3374 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3375 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3376 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3377 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3378 ((c >= 0x10000) && (c <= 0xEFFFF))
3379 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003380 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381 count = 0;
3382 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003383 if (ctxt->instate == XML_PARSER_EOF)
3384 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003385 }
3386 len += l;
3387 NEXTL(l);
3388 c = CUR_CHAR(l);
3389 }
3390 } else {
3391 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3392 (!IS_LETTER(c) && (c != '_') &&
3393 (c != ':'))) {
3394 return(NULL);
3395 }
3396 len += l;
3397 NEXTL(l);
3398 c = CUR_CHAR(l);
3399
3400 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3401 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3402 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003403 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003404 (IS_COMBINING(c)) ||
3405 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003406 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003407 count = 0;
3408 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003409 if (ctxt->instate == XML_PARSER_EOF)
3410 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003411 }
3412 len += l;
3413 NEXTL(l);
3414 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003415 if (c == 0) {
3416 count = 0;
3417 GROW;
3418 if (ctxt->instate == XML_PARSER_EOF)
3419 return(NULL);
3420 c = CUR_CHAR(l);
3421 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003422 }
3423 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003424 if ((len > XML_MAX_NAME_LENGTH) &&
3425 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3426 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3427 return(NULL);
3428 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003429 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3430 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3431 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3432}
3433
Owen Taylor3473f882001-02-23 17:55:21 +00003434/**
3435 * xmlParseName:
3436 * @ctxt: an XML parser context
3437 *
3438 * parse an XML name.
3439 *
3440 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3441 * CombiningChar | Extender
3442 *
3443 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3444 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003445 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003446 *
3447 * Returns the Name parsed or NULL
3448 */
3449
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003450const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003451xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003452 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003453 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003454 int count = 0;
3455
3456 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003457
Daniel Veillardc6561462009-03-25 10:22:31 +00003458#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003459 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003460#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003461
Daniel Veillard48b2f892001-02-25 16:11:03 +00003462 /*
3463 * Accelerator for simple ASCII names
3464 */
3465 in = ctxt->input->cur;
3466 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3467 ((*in >= 0x41) && (*in <= 0x5A)) ||
3468 (*in == '_') || (*in == ':')) {
3469 in++;
3470 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3471 ((*in >= 0x41) && (*in <= 0x5A)) ||
3472 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003473 (*in == '_') || (*in == '-') ||
3474 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003475 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003476 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003477 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003478 if ((count > XML_MAX_NAME_LENGTH) &&
3479 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3480 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3481 return(NULL);
3482 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003483 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003484 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003485 ctxt->nbChars += count;
3486 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003487 if (ret == NULL)
3488 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003489 return(ret);
3490 }
3491 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003492 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003493 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003494}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003495
Daniel Veillard34e3f642008-07-29 09:02:27 +00003496static const xmlChar *
3497xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3498 int len = 0, l;
3499 int c;
3500 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003501 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003502
Daniel Veillardc6561462009-03-25 10:22:31 +00003503#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003505#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003506
3507 /*
3508 * Handler for more complex cases
3509 */
3510 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003511 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003512 c = CUR_CHAR(l);
3513 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3514 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3515 return(NULL);
3516 }
3517
3518 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3519 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003520 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003521 if ((len > XML_MAX_NAME_LENGTH) &&
3522 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3523 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3524 return(NULL);
3525 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003526 count = 0;
3527 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003528 if (ctxt->instate == XML_PARSER_EOF)
3529 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003530 }
3531 len += l;
3532 NEXTL(l);
3533 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003534 if (c == 0) {
3535 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003536 /*
3537 * when shrinking to extend the buffer we really need to preserve
3538 * the part of the name we already parsed. Hence rolling back
3539 * by current lenght.
3540 */
3541 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003542 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003543 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003544 if (ctxt->instate == XML_PARSER_EOF)
3545 return(NULL);
3546 c = CUR_CHAR(l);
3547 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003548 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003549 if ((len > XML_MAX_NAME_LENGTH) &&
3550 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3551 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3552 return(NULL);
3553 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003554 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003555}
3556
3557/**
3558 * xmlParseNCName:
3559 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003560 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003561 *
3562 * parse an XML name.
3563 *
3564 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3565 * CombiningChar | Extender
3566 *
3567 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3568 *
3569 * Returns the Name parsed or NULL
3570 */
3571
3572static const xmlChar *
3573xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003574 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003575 const xmlChar *ret;
3576 int count = 0;
3577
Daniel Veillardc6561462009-03-25 10:22:31 +00003578#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003579 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003580#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003581
3582 /*
3583 * Accelerator for simple ASCII names
3584 */
3585 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003586 e = ctxt->input->end;
3587 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3588 ((*in >= 0x41) && (*in <= 0x5A)) ||
3589 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003590 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003591 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3592 ((*in >= 0x41) && (*in <= 0x5A)) ||
3593 ((*in >= 0x30) && (*in <= 0x39)) ||
3594 (*in == '_') || (*in == '-') ||
3595 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003596 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003597 if (in >= e)
3598 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003599 if ((*in > 0) && (*in < 0x80)) {
3600 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003601 if ((count > XML_MAX_NAME_LENGTH) &&
3602 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3603 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3604 return(NULL);
3605 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003606 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3607 ctxt->input->cur = in;
3608 ctxt->nbChars += count;
3609 ctxt->input->col += count;
3610 if (ret == NULL) {
3611 xmlErrMemory(ctxt, NULL);
3612 }
3613 return(ret);
3614 }
3615 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003616complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003617 return(xmlParseNCNameComplex(ctxt));
3618}
3619
Daniel Veillard46de64e2002-05-29 08:21:33 +00003620/**
3621 * xmlParseNameAndCompare:
3622 * @ctxt: an XML parser context
3623 *
3624 * parse an XML name and compares for match
3625 * (specialized for endtag parsing)
3626 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003627 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3628 * and the name for mismatch
3629 */
3630
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003631static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003632xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003633 register const xmlChar *cmp = other;
3634 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003635 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003636
3637 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003638 if (ctxt->instate == XML_PARSER_EOF)
3639 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003640
Daniel Veillard46de64e2002-05-29 08:21:33 +00003641 in = ctxt->input->cur;
3642 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003643 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003644 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003645 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003646 }
William M. Brack76e95df2003-10-18 16:20:14 +00003647 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003648 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003649 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003650 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003651 }
3652 /* failure (or end of input buffer), check with full function */
3653 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003654 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003655 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003656 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003657 }
3658 return ret;
3659}
3660
Owen Taylor3473f882001-02-23 17:55:21 +00003661/**
3662 * xmlParseStringName:
3663 * @ctxt: an XML parser context
3664 * @str: a pointer to the string pointer (IN/OUT)
3665 *
3666 * parse an XML name.
3667 *
3668 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3669 * CombiningChar | Extender
3670 *
3671 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3672 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003673 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003674 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003675 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003676 * is updated to the current location in the string.
3677 */
3678
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003679static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003680xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3681 xmlChar buf[XML_MAX_NAMELEN + 5];
3682 const xmlChar *cur = *str;
3683 int len = 0, l;
3684 int c;
3685
Daniel Veillardc6561462009-03-25 10:22:31 +00003686#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003687 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003688#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003689
Owen Taylor3473f882001-02-23 17:55:21 +00003690 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003691 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003692 return(NULL);
3693 }
3694
Daniel Veillard34e3f642008-07-29 09:02:27 +00003695 COPY_BUF(l,buf,len,c);
3696 cur += l;
3697 c = CUR_SCHAR(cur, l);
3698 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003699 COPY_BUF(l,buf,len,c);
3700 cur += l;
3701 c = CUR_SCHAR(cur, l);
3702 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3703 /*
3704 * Okay someone managed to make a huge name, so he's ready to pay
3705 * for the processing speed.
3706 */
3707 xmlChar *buffer;
3708 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003709
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003710 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003711 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003712 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003713 return(NULL);
3714 }
3715 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003716 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003717 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003718 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003719
3720 if ((len > XML_MAX_NAME_LENGTH) &&
3721 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3722 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3723 xmlFree(buffer);
3724 return(NULL);
3725 }
Owen Taylor3473f882001-02-23 17:55:21 +00003726 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003727 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003728 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003729 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003730 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003731 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003732 return(NULL);
3733 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003734 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003735 }
3736 COPY_BUF(l,buffer,len,c);
3737 cur += l;
3738 c = CUR_SCHAR(cur, l);
3739 }
3740 buffer[len] = 0;
3741 *str = cur;
3742 return(buffer);
3743 }
3744 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003745 if ((len > XML_MAX_NAME_LENGTH) &&
3746 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3747 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3748 return(NULL);
3749 }
Owen Taylor3473f882001-02-23 17:55:21 +00003750 *str = cur;
3751 return(xmlStrndup(buf, len));
3752}
3753
3754/**
3755 * xmlParseNmtoken:
3756 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003757 *
Owen Taylor3473f882001-02-23 17:55:21 +00003758 * parse an XML Nmtoken.
3759 *
3760 * [7] Nmtoken ::= (NameChar)+
3761 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003762 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003763 *
3764 * Returns the Nmtoken parsed or NULL
3765 */
3766
3767xmlChar *
3768xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3769 xmlChar buf[XML_MAX_NAMELEN + 5];
3770 int len = 0, l;
3771 int c;
3772 int count = 0;
3773
Daniel Veillardc6561462009-03-25 10:22:31 +00003774#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003775 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003776#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003777
Owen Taylor3473f882001-02-23 17:55:21 +00003778 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003779 if (ctxt->instate == XML_PARSER_EOF)
3780 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003781 c = CUR_CHAR(l);
3782
Daniel Veillard34e3f642008-07-29 09:02:27 +00003783 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003784 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003785 count = 0;
3786 GROW;
3787 }
3788 COPY_BUF(l,buf,len,c);
3789 NEXTL(l);
3790 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003791 if (c == 0) {
3792 count = 0;
3793 GROW;
3794 if (ctxt->instate == XML_PARSER_EOF)
3795 return(NULL);
3796 c = CUR_CHAR(l);
3797 }
Owen Taylor3473f882001-02-23 17:55:21 +00003798 if (len >= XML_MAX_NAMELEN) {
3799 /*
3800 * Okay someone managed to make a huge token, so he's ready to pay
3801 * for the processing speed.
3802 */
3803 xmlChar *buffer;
3804 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003805
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003806 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003807 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003808 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003809 return(NULL);
3810 }
3811 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003812 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003813 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003814 count = 0;
3815 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003816 if (ctxt->instate == XML_PARSER_EOF) {
3817 xmlFree(buffer);
3818 return(NULL);
3819 }
Owen Taylor3473f882001-02-23 17:55:21 +00003820 }
3821 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003822 xmlChar *tmp;
3823
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003824 if ((max > XML_MAX_NAME_LENGTH) &&
3825 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3826 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3827 xmlFree(buffer);
3828 return(NULL);
3829 }
Owen Taylor3473f882001-02-23 17:55:21 +00003830 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003831 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003832 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003833 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003834 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003835 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003836 return(NULL);
3837 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003838 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003839 }
3840 COPY_BUF(l,buffer,len,c);
3841 NEXTL(l);
3842 c = CUR_CHAR(l);
3843 }
3844 buffer[len] = 0;
3845 return(buffer);
3846 }
3847 }
3848 if (len == 0)
3849 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003850 if ((len > XML_MAX_NAME_LENGTH) &&
3851 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3852 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3853 return(NULL);
3854 }
Owen Taylor3473f882001-02-23 17:55:21 +00003855 return(xmlStrndup(buf, len));
3856}
3857
3858/**
3859 * xmlParseEntityValue:
3860 * @ctxt: an XML parser context
3861 * @orig: if non-NULL store a copy of the original entity value
3862 *
3863 * parse a value for ENTITY declarations
3864 *
3865 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3866 * "'" ([^%&'] | PEReference | Reference)* "'"
3867 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003868 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003869 */
3870
3871xmlChar *
3872xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3873 xmlChar *buf = NULL;
3874 int len = 0;
3875 int size = XML_PARSER_BUFFER_SIZE;
3876 int c, l;
3877 xmlChar stop;
3878 xmlChar *ret = NULL;
3879 const xmlChar *cur = NULL;
3880 xmlParserInputPtr input;
3881
3882 if (RAW == '"') stop = '"';
3883 else if (RAW == '\'') stop = '\'';
3884 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003885 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003886 return(NULL);
3887 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003888 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003889 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003890 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003891 return(NULL);
3892 }
3893
3894 /*
3895 * The content of the entity definition is copied in a buffer.
3896 */
3897
3898 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3899 input = ctxt->input;
3900 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003901 if (ctxt->instate == XML_PARSER_EOF) {
3902 xmlFree(buf);
3903 return(NULL);
3904 }
Owen Taylor3473f882001-02-23 17:55:21 +00003905 NEXT;
3906 c = CUR_CHAR(l);
3907 /*
3908 * NOTE: 4.4.5 Included in Literal
3909 * When a parameter entity reference appears in a literal entity
3910 * value, ... a single or double quote character in the replacement
3911 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003912 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003913 * In practice it means we stop the loop only when back at parsing
3914 * the initial entity and the quote is found
3915 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003916 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3917 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003918 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003919 xmlChar *tmp;
3920
Owen Taylor3473f882001-02-23 17:55:21 +00003921 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003922 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3923 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003924 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003925 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003926 return(NULL);
3927 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003928 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003929 }
3930 COPY_BUF(l,buf,len,c);
3931 NEXTL(l);
3932 /*
3933 * Pop-up of finished entities.
3934 */
3935 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3936 xmlPopInput(ctxt);
3937
3938 GROW;
3939 c = CUR_CHAR(l);
3940 if (c == 0) {
3941 GROW;
3942 c = CUR_CHAR(l);
3943 }
3944 }
3945 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003946 if (ctxt->instate == XML_PARSER_EOF) {
3947 xmlFree(buf);
3948 return(NULL);
3949 }
Owen Taylor3473f882001-02-23 17:55:21 +00003950
3951 /*
3952 * Raise problem w.r.t. '&' and '%' being used in non-entities
3953 * reference constructs. Note Charref will be handled in
3954 * xmlStringDecodeEntities()
3955 */
3956 cur = buf;
3957 while (*cur != 0) { /* non input consuming */
3958 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3959 xmlChar *name;
3960 xmlChar tmp = *cur;
3961
3962 cur++;
3963 name = xmlParseStringName(ctxt, &cur);
3964 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003965 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003966 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003967 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003968 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003969 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3970 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003971 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003972 }
3973 if (name != NULL)
3974 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003975 if (*cur == 0)
3976 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003977 }
3978 cur++;
3979 }
3980
3981 /*
3982 * Then PEReference entities are substituted.
3983 */
3984 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003985 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003986 xmlFree(buf);
3987 } else {
3988 NEXT;
3989 /*
3990 * NOTE: 4.4.7 Bypassed
3991 * When a general entity reference appears in the EntityValue in
3992 * an entity declaration, it is bypassed and left as is.
3993 * so XML_SUBSTITUTE_REF is not set here.
3994 */
Peter Simons8f30bdf2016-04-15 11:56:55 +02003995 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003996 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3997 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003998 --ctxt->depth;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003999 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00004000 *orig = buf;
4001 else
4002 xmlFree(buf);
4003 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004004
Owen Taylor3473f882001-02-23 17:55:21 +00004005 return(ret);
4006}
4007
4008/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00004009 * xmlParseAttValueComplex:
4010 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00004011 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004012 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00004013 *
4014 * parse a value for an attribute, this is the fallback function
4015 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004016 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00004017 *
4018 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4019 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00004020static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004021xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00004022 xmlChar limit = 0;
4023 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004024 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004025 size_t len = 0;
4026 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004027 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004028 xmlChar *current = NULL;
4029 xmlEntityPtr ent;
4030
Owen Taylor3473f882001-02-23 17:55:21 +00004031 if (NXT(0) == '"') {
4032 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4033 limit = '"';
4034 NEXT;
4035 } else if (NXT(0) == '\'') {
4036 limit = '\'';
4037 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4038 NEXT;
4039 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004040 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 return(NULL);
4042 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00004043
Owen Taylor3473f882001-02-23 17:55:21 +00004044 /*
4045 * allocate a translation buffer.
4046 */
4047 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004048 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004049 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00004050
4051 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004052 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00004053 */
4054 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004055 while (((NXT(0) != limit) && /* checked */
4056 (IS_CHAR(c)) && (c != '<')) &&
4057 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08004058 /*
4059 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4060 * special option is given
4061 */
4062 if ((len > XML_MAX_TEXT_LENGTH) &&
4063 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4064 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004065 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08004066 goto mem_error;
4067 }
Owen Taylor3473f882001-02-23 17:55:21 +00004068 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00004069 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00004070 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004071 if (NXT(1) == '#') {
4072 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004073
Owen Taylor3473f882001-02-23 17:55:21 +00004074 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00004075 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004076 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004077 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004078 }
4079 buf[len++] = '&';
4080 } else {
4081 /*
4082 * The reparsing will be done in xmlStringGetNodeList()
4083 * called by the attribute() function in SAX.c
4084 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004085 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004086 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004087 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004088 buf[len++] = '&';
4089 buf[len++] = '#';
4090 buf[len++] = '3';
4091 buf[len++] = '8';
4092 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004093 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004094 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004095 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004096 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004097 }
Owen Taylor3473f882001-02-23 17:55:21 +00004098 len += xmlCopyChar(0, &buf[len], val);
4099 }
4100 } else {
4101 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004102 ctxt->nbentities++;
4103 if (ent != NULL)
4104 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004105 if ((ent != NULL) &&
4106 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004107 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004108 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004109 }
4110 if ((ctxt->replaceEntities == 0) &&
4111 (ent->content[0] == '&')) {
4112 buf[len++] = '&';
4113 buf[len++] = '#';
4114 buf[len++] = '3';
4115 buf[len++] = '8';
4116 buf[len++] = ';';
4117 } else {
4118 buf[len++] = ent->content[0];
4119 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004120 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004121 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004122 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02004123 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004124 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004125 XML_SUBSTITUTE_REF,
4126 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004127 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004128 if (rep != NULL) {
4129 current = rep;
4130 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004131 if ((*current == 0xD) || (*current == 0xA) ||
4132 (*current == 0x9)) {
4133 buf[len++] = 0x20;
4134 current++;
4135 } else
4136 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004137 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004138 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 }
4140 }
4141 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004142 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004143 }
4144 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004145 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004146 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004147 }
Owen Taylor3473f882001-02-23 17:55:21 +00004148 if (ent->content != NULL)
4149 buf[len++] = ent->content[0];
4150 }
4151 } else if (ent != NULL) {
4152 int i = xmlStrlen(ent->name);
4153 const xmlChar *cur = ent->name;
4154
4155 /*
4156 * This may look absurd but is needed to detect
4157 * entities problems
4158 */
4159 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004160 (ent->content != NULL) && (ent->checked == 0)) {
4161 unsigned long oldnbent = ctxt->nbentities;
4162
Peter Simons8f30bdf2016-04-15 11:56:55 +02004163 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004164 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004165 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004166 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004167
Daniel Veillardcff25462013-03-11 15:57:55 +08004168 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004169 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004170 if (xmlStrchr(rep, '<'))
4171 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004172 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004173 rep = NULL;
4174 }
Owen Taylor3473f882001-02-23 17:55:21 +00004175 }
4176
4177 /*
4178 * Just output the reference
4179 */
4180 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004181 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004182 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004183 }
4184 for (;i > 0;i--)
4185 buf[len++] = *cur++;
4186 buf[len++] = ';';
4187 }
4188 }
4189 } else {
4190 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004191 if ((len != 0) || (!normalize)) {
4192 if ((!normalize) || (!in_space)) {
4193 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004194 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004195 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004196 }
4197 }
4198 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004199 }
4200 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004201 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004202 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004203 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004204 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004205 }
4206 }
4207 NEXTL(l);
4208 }
4209 GROW;
4210 c = CUR_CHAR(l);
4211 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004212 if (ctxt->instate == XML_PARSER_EOF)
4213 goto error;
4214
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004215 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004216 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004218 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004219 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004220 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004221 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004222 if ((c != 0) && (!IS_CHAR(c))) {
4223 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4224 "invalid character in attribute value\n");
4225 } else {
4226 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4227 "AttValue: ' expected\n");
4228 }
Owen Taylor3473f882001-02-23 17:55:21 +00004229 } else
4230 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004231
4232 /*
4233 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004234 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004235 */
4236 if (len >= INT_MAX) {
4237 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004238 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004239 goto mem_error;
4240 }
4241
4242 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004243 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004244
4245mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004246 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004247error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004248 if (buf != NULL)
4249 xmlFree(buf);
4250 if (rep != NULL)
4251 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004252 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004253}
4254
4255/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004256 * xmlParseAttValue:
4257 * @ctxt: an XML parser context
4258 *
4259 * parse a value for an attribute
4260 * Note: the parser won't do substitution of entities here, this
4261 * will be handled later in xmlStringGetNodeList
4262 *
4263 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4264 * "'" ([^<&'] | Reference)* "'"
4265 *
4266 * 3.3.3 Attribute-Value Normalization:
4267 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004268 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004269 * - a character reference is processed by appending the referenced
4270 * character to the attribute value
4271 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004272 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004273 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274 * appending #x20 to the normalized value, except that only a single
4275 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004276 * parsed entity or the literal entity value of an internal parsed entity
4277 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004278 * If the declared value is not CDATA, then the XML processor must further
4279 * process the normalized attribute value by discarding any leading and
4280 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004281 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004282 * All attributes for which no declaration has been read should be treated
4283 * by a non-validating parser as if declared CDATA.
4284 *
4285 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4286 */
4287
4288
4289xmlChar *
4290xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004291 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004292 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004293}
4294
4295/**
Owen Taylor3473f882001-02-23 17:55:21 +00004296 * xmlParseSystemLiteral:
4297 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004298 *
Owen Taylor3473f882001-02-23 17:55:21 +00004299 * parse an XML Literal
4300 *
4301 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4302 *
4303 * Returns the SystemLiteral parsed or NULL
4304 */
4305
4306xmlChar *
4307xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4308 xmlChar *buf = NULL;
4309 int len = 0;
4310 int size = XML_PARSER_BUFFER_SIZE;
4311 int cur, l;
4312 xmlChar stop;
4313 int state = ctxt->instate;
4314 int count = 0;
4315
4316 SHRINK;
4317 if (RAW == '"') {
4318 NEXT;
4319 stop = '"';
4320 } else if (RAW == '\'') {
4321 NEXT;
4322 stop = '\'';
4323 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004324 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 return(NULL);
4326 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004327
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004328 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004329 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004330 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004331 return(NULL);
4332 }
4333 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4334 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004335 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004336 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004337 xmlChar *tmp;
4338
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004339 if ((size > XML_MAX_NAME_LENGTH) &&
4340 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4341 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4342 xmlFree(buf);
4343 ctxt->instate = (xmlParserInputState) state;
4344 return(NULL);
4345 }
Owen Taylor3473f882001-02-23 17:55:21 +00004346 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004347 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4348 if (tmp == NULL) {
4349 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004350 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004351 ctxt->instate = (xmlParserInputState) state;
4352 return(NULL);
4353 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004354 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004355 }
4356 count++;
4357 if (count > 50) {
4358 GROW;
4359 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004360 if (ctxt->instate == XML_PARSER_EOF) {
4361 xmlFree(buf);
4362 return(NULL);
4363 }
Owen Taylor3473f882001-02-23 17:55:21 +00004364 }
4365 COPY_BUF(l,buf,len,cur);
4366 NEXTL(l);
4367 cur = CUR_CHAR(l);
4368 if (cur == 0) {
4369 GROW;
4370 SHRINK;
4371 cur = CUR_CHAR(l);
4372 }
4373 }
4374 buf[len] = 0;
4375 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004376 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004377 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004378 } else {
4379 NEXT;
4380 }
4381 return(buf);
4382}
4383
4384/**
4385 * xmlParsePubidLiteral:
4386 * @ctxt: an XML parser context
4387 *
4388 * parse an XML public literal
4389 *
4390 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4391 *
4392 * Returns the PubidLiteral parsed or NULL.
4393 */
4394
4395xmlChar *
4396xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4397 xmlChar *buf = NULL;
4398 int len = 0;
4399 int size = XML_PARSER_BUFFER_SIZE;
4400 xmlChar cur;
4401 xmlChar stop;
4402 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004403 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004404
4405 SHRINK;
4406 if (RAW == '"') {
4407 NEXT;
4408 stop = '"';
4409 } else if (RAW == '\'') {
4410 NEXT;
4411 stop = '\'';
4412 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004413 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004414 return(NULL);
4415 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004416 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004417 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004418 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004419 return(NULL);
4420 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004421 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004422 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004423 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004424 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004425 xmlChar *tmp;
4426
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004427 if ((size > XML_MAX_NAME_LENGTH) &&
4428 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4429 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4430 xmlFree(buf);
4431 return(NULL);
4432 }
Owen Taylor3473f882001-02-23 17:55:21 +00004433 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004434 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4435 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004436 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004437 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004438 return(NULL);
4439 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004440 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004441 }
4442 buf[len++] = cur;
4443 count++;
4444 if (count > 50) {
4445 GROW;
4446 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004447 if (ctxt->instate == XML_PARSER_EOF) {
4448 xmlFree(buf);
4449 return(NULL);
4450 }
Owen Taylor3473f882001-02-23 17:55:21 +00004451 }
4452 NEXT;
4453 cur = CUR;
4454 if (cur == 0) {
4455 GROW;
4456 SHRINK;
4457 cur = CUR;
4458 }
4459 }
4460 buf[len] = 0;
4461 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004462 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004463 } else {
4464 NEXT;
4465 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004466 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004467 return(buf);
4468}
4469
Daniel Veillard8ed10722009-08-20 19:17:36 +02004470static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004471
4472/*
4473 * used for the test in the inner loop of the char data testing
4474 */
4475static const unsigned char test_char_data[256] = {
4476 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4477 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4478 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4479 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4481 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4482 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4483 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4484 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4485 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4486 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4487 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4488 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4489 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4490 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4491 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4508};
4509
Owen Taylor3473f882001-02-23 17:55:21 +00004510/**
4511 * xmlParseCharData:
4512 * @ctxt: an XML parser context
4513 * @cdata: int indicating whether we are within a CDATA section
4514 *
4515 * parse a CharData section.
4516 * if we are within a CDATA section ']]>' marks an end of section.
4517 *
4518 * The right angle bracket (>) may be represented using the string "&gt;",
4519 * and must, for compatibility, be escaped using "&gt;" or a character
4520 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004521 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004522 *
4523 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4524 */
4525
4526void
4527xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004528 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004529 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004530 int line = ctxt->input->line;
4531 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004532 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004533
4534 SHRINK;
4535 GROW;
4536 /*
4537 * Accelerated common case where input don't need to be
4538 * modified before passing it to the handler.
4539 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004540 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004541 in = ctxt->input->cur;
4542 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004543get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004544 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004545 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004546 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004547 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004548 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004549 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004550 goto get_more_space;
4551 }
4552 if (*in == '<') {
4553 nbchar = in - ctxt->input->cur;
4554 if (nbchar > 0) {
4555 const xmlChar *tmp = ctxt->input->cur;
4556 ctxt->input->cur = in;
4557
Daniel Veillard34099b42004-11-04 17:34:35 +00004558 if ((ctxt->sax != NULL) &&
4559 (ctxt->sax->ignorableWhitespace !=
4560 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004561 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004562 if (ctxt->sax->ignorableWhitespace != NULL)
4563 ctxt->sax->ignorableWhitespace(ctxt->userData,
4564 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004565 } else {
4566 if (ctxt->sax->characters != NULL)
4567 ctxt->sax->characters(ctxt->userData,
4568 tmp, nbchar);
4569 if (*ctxt->space == -1)
4570 *ctxt->space = -2;
4571 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004572 } else if ((ctxt->sax != NULL) &&
4573 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004574 ctxt->sax->characters(ctxt->userData,
4575 tmp, nbchar);
4576 }
4577 }
4578 return;
4579 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004580
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004581get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004582 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004583 while (test_char_data[*in]) {
4584 in++;
4585 ccol++;
4586 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004587 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004588 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004589 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004590 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004591 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004592 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004593 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004594 }
4595 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004596 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004597 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004598 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004599 return;
4600 }
4601 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004602 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004603 goto get_more;
4604 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004605 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004606 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004607 if ((ctxt->sax != NULL) &&
4608 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004609 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004610 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004611 const xmlChar *tmp = ctxt->input->cur;
4612 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004613
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004614 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004615 if (ctxt->sax->ignorableWhitespace != NULL)
4616 ctxt->sax->ignorableWhitespace(ctxt->userData,
4617 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004618 } else {
4619 if (ctxt->sax->characters != NULL)
4620 ctxt->sax->characters(ctxt->userData,
4621 tmp, nbchar);
4622 if (*ctxt->space == -1)
4623 *ctxt->space = -2;
4624 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004625 line = ctxt->input->line;
4626 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004627 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004628 if (ctxt->sax->characters != NULL)
4629 ctxt->sax->characters(ctxt->userData,
4630 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004631 line = ctxt->input->line;
4632 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004633 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004634 /* something really bad happened in the SAX callback */
4635 if (ctxt->instate != XML_PARSER_CONTENT)
4636 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004637 }
4638 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004639 if (*in == 0xD) {
4640 in++;
4641 if (*in == 0xA) {
4642 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004643 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004644 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004645 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004646 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004647 in--;
4648 }
4649 if (*in == '<') {
4650 return;
4651 }
4652 if (*in == '&') {
4653 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004654 }
4655 SHRINK;
4656 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004657 if (ctxt->instate == XML_PARSER_EOF)
4658 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004659 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004660 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004661 nbchar = 0;
4662 }
Daniel Veillard50582112001-03-26 22:52:16 +00004663 ctxt->input->line = line;
4664 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004665 xmlParseCharDataComplex(ctxt, cdata);
4666}
4667
Daniel Veillard01c13b52002-12-10 15:19:08 +00004668/**
4669 * xmlParseCharDataComplex:
4670 * @ctxt: an XML parser context
4671 * @cdata: int indicating whether we are within a CDATA section
4672 *
4673 * parse a CharData section.this is the fallback function
4674 * of xmlParseCharData() when the parsing requires handling
4675 * of non-ASCII characters.
4676 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004677static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004678xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004679 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4680 int nbchar = 0;
4681 int cur, l;
4682 int count = 0;
4683
4684 SHRINK;
4685 GROW;
4686 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004687 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004688 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004689 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004690 if ((cur == ']') && (NXT(1) == ']') &&
4691 (NXT(2) == '>')) {
4692 if (cdata) break;
4693 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004694 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004695 }
4696 }
4697 COPY_BUF(l,buf,nbchar,cur);
4698 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004699 buf[nbchar] = 0;
4700
Owen Taylor3473f882001-02-23 17:55:21 +00004701 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004702 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004703 */
4704 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004705 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004706 if (ctxt->sax->ignorableWhitespace != NULL)
4707 ctxt->sax->ignorableWhitespace(ctxt->userData,
4708 buf, nbchar);
4709 } else {
4710 if (ctxt->sax->characters != NULL)
4711 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004712 if ((ctxt->sax->characters !=
4713 ctxt->sax->ignorableWhitespace) &&
4714 (*ctxt->space == -1))
4715 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004716 }
4717 }
4718 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004719 /* something really bad happened in the SAX callback */
4720 if (ctxt->instate != XML_PARSER_CONTENT)
4721 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004722 }
4723 count++;
4724 if (count > 50) {
4725 GROW;
4726 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004727 if (ctxt->instate == XML_PARSER_EOF)
4728 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004729 }
4730 NEXTL(l);
4731 cur = CUR_CHAR(l);
4732 }
4733 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004734 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004735 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004736 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004737 */
4738 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004739 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004740 if (ctxt->sax->ignorableWhitespace != NULL)
4741 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4742 } else {
4743 if (ctxt->sax->characters != NULL)
4744 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004745 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4746 (*ctxt->space == -1))
4747 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004748 }
4749 }
4750 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004751 if ((cur != 0) && (!IS_CHAR(cur))) {
4752 /* Generate the error and skip the offending character */
4753 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4754 "PCDATA invalid Char value %d\n",
4755 cur);
4756 NEXTL(l);
4757 }
Owen Taylor3473f882001-02-23 17:55:21 +00004758}
4759
4760/**
4761 * xmlParseExternalID:
4762 * @ctxt: an XML parser context
4763 * @publicID: a xmlChar** receiving PubidLiteral
4764 * @strict: indicate whether we should restrict parsing to only
4765 * production [75], see NOTE below
4766 *
4767 * Parse an External ID or a Public ID
4768 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004769 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004770 * 'PUBLIC' S PubidLiteral S SystemLiteral
4771 *
4772 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4773 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4774 *
4775 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4776 *
4777 * Returns the function returns SystemLiteral and in the second
4778 * case publicID receives PubidLiteral, is strict is off
4779 * it is possible to return NULL and have publicID set.
4780 */
4781
4782xmlChar *
4783xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4784 xmlChar *URI = NULL;
4785
4786 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004787
4788 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004789 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004790 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004791 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004792 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4793 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004794 }
4795 SKIP_BLANKS;
4796 URI = xmlParseSystemLiteral(ctxt);
4797 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004798 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004799 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004800 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004801 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004802 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004804 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004805 }
4806 SKIP_BLANKS;
4807 *publicID = xmlParsePubidLiteral(ctxt);
4808 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004809 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004810 }
4811 if (strict) {
4812 /*
4813 * We don't handle [83] so "S SystemLiteral" is required.
4814 */
William M. Brack76e95df2003-10-18 16:20:14 +00004815 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004816 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004817 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004818 }
4819 } else {
4820 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004821 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004822 * "S SystemLiteral" is not detected. From a purely parsing
4823 * point of view that's a nice mess.
4824 */
4825 const xmlChar *ptr;
4826 GROW;
4827
4828 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004829 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004830
William M. Brack76e95df2003-10-18 16:20:14 +00004831 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004832 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4833 }
4834 SKIP_BLANKS;
4835 URI = xmlParseSystemLiteral(ctxt);
4836 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004837 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004838 }
4839 }
4840 return(URI);
4841}
4842
4843/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004844 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004845 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004846 * @buf: the already parsed part of the buffer
4847 * @len: number of bytes filles in the buffer
4848 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004849 *
4850 * Skip an XML (SGML) comment <!-- .... -->
4851 * The spec says that "For compatibility, the string "--" (double-hyphen)
4852 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004853 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004854 *
4855 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4856 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004857static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004858xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4859 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004860 int q, ql;
4861 int r, rl;
4862 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004863 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004864 int inputid;
4865
4866 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004867
Owen Taylor3473f882001-02-23 17:55:21 +00004868 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004869 len = 0;
4870 size = XML_PARSER_BUFFER_SIZE;
4871 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4872 if (buf == NULL) {
4873 xmlErrMemory(ctxt, NULL);
4874 return;
4875 }
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004877 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004878 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004879 if (q == 0)
4880 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004881 if (!IS_CHAR(q)) {
4882 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4883 "xmlParseComment: invalid xmlChar value %d\n",
4884 q);
4885 xmlFree (buf);
4886 return;
4887 }
Owen Taylor3473f882001-02-23 17:55:21 +00004888 NEXTL(ql);
4889 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004890 if (r == 0)
4891 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004892 if (!IS_CHAR(r)) {
4893 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4894 "xmlParseComment: invalid xmlChar value %d\n",
4895 q);
4896 xmlFree (buf);
4897 return;
4898 }
Owen Taylor3473f882001-02-23 17:55:21 +00004899 NEXTL(rl);
4900 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004901 if (cur == 0)
4902 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004903 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004904 ((cur != '>') ||
4905 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004906 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004907 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004908 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004909 if ((len > XML_MAX_TEXT_LENGTH) &&
4910 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4911 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4912 "Comment too big found", NULL);
4913 xmlFree (buf);
4914 return;
4915 }
Owen Taylor3473f882001-02-23 17:55:21 +00004916 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004917 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004918 size_t new_size;
4919
4920 new_size = size * 2;
4921 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004922 if (new_buf == NULL) {
4923 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004924 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004925 return;
4926 }
William M. Bracka3215c72004-07-31 16:24:01 +00004927 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004928 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004929 }
4930 COPY_BUF(ql,buf,len,q);
4931 q = r;
4932 ql = rl;
4933 r = cur;
4934 rl = l;
4935
4936 count++;
4937 if (count > 50) {
4938 GROW;
4939 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004940 if (ctxt->instate == XML_PARSER_EOF) {
4941 xmlFree(buf);
4942 return;
4943 }
Owen Taylor3473f882001-02-23 17:55:21 +00004944 }
4945 NEXTL(l);
4946 cur = CUR_CHAR(l);
4947 if (cur == 0) {
4948 SHRINK;
4949 GROW;
4950 cur = CUR_CHAR(l);
4951 }
4952 }
4953 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004954 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004955 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004956 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004957 } else if (!IS_CHAR(cur)) {
4958 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4959 "xmlParseComment: invalid xmlChar value %d\n",
4960 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004961 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004962 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004963 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4964 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004965 }
4966 NEXT;
4967 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4968 (!ctxt->disableSAX))
4969 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004970 }
Daniel Veillardda629342007-08-01 07:49:06 +00004971 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004972 return;
4973not_terminated:
4974 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4975 "Comment not terminated\n", NULL);
4976 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004977 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004978}
Daniel Veillardda629342007-08-01 07:49:06 +00004979
Daniel Veillard4c778d82005-01-23 17:37:44 +00004980/**
4981 * xmlParseComment:
4982 * @ctxt: an XML parser context
4983 *
4984 * Skip an XML (SGML) comment <!-- .... -->
4985 * The spec says that "For compatibility, the string "--" (double-hyphen)
4986 * must not occur within comments. "
4987 *
4988 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4989 */
4990void
4991xmlParseComment(xmlParserCtxtPtr ctxt) {
4992 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004993 size_t size = XML_PARSER_BUFFER_SIZE;
4994 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004995 xmlParserInputState state;
4996 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004997 size_t nbchar = 0;
4998 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004999 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005000
5001 /*
5002 * Check that there is a comment right here.
5003 */
5004 if ((RAW != '<') || (NXT(1) != '!') ||
5005 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005006 state = ctxt->instate;
5007 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00005008 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005009 SKIP(4);
5010 SHRINK;
5011 GROW;
5012
5013 /*
5014 * Accelerated common case where input don't need to be
5015 * modified before passing it to the handler.
5016 */
5017 in = ctxt->input->cur;
5018 do {
5019 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005020 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005021 ctxt->input->line++; ctxt->input->col = 1;
5022 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005023 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005024 }
5025get_more:
5026 ccol = ctxt->input->col;
5027 while (((*in > '-') && (*in <= 0x7F)) ||
5028 ((*in >= 0x20) && (*in < '-')) ||
5029 (*in == 0x09)) {
5030 in++;
5031 ccol++;
5032 }
5033 ctxt->input->col = ccol;
5034 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005035 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005036 ctxt->input->line++; ctxt->input->col = 1;
5037 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005038 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005039 goto get_more;
5040 }
5041 nbchar = in - ctxt->input->cur;
5042 /*
5043 * save current set of data
5044 */
5045 if (nbchar > 0) {
5046 if ((ctxt->sax != NULL) &&
5047 (ctxt->sax->comment != NULL)) {
5048 if (buf == NULL) {
5049 if ((*in == '-') && (in[1] == '-'))
5050 size = nbchar + 1;
5051 else
5052 size = XML_PARSER_BUFFER_SIZE + nbchar;
5053 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5054 if (buf == NULL) {
5055 xmlErrMemory(ctxt, NULL);
5056 ctxt->instate = state;
5057 return;
5058 }
5059 len = 0;
5060 } else if (len + nbchar + 1 >= size) {
5061 xmlChar *new_buf;
5062 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5063 new_buf = (xmlChar *) xmlRealloc(buf,
5064 size * sizeof(xmlChar));
5065 if (new_buf == NULL) {
5066 xmlFree (buf);
5067 xmlErrMemory(ctxt, NULL);
5068 ctxt->instate = state;
5069 return;
5070 }
5071 buf = new_buf;
5072 }
5073 memcpy(&buf[len], ctxt->input->cur, nbchar);
5074 len += nbchar;
5075 buf[len] = 0;
5076 }
5077 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08005078 if ((len > XML_MAX_TEXT_LENGTH) &&
5079 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5080 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5081 "Comment too big found", NULL);
5082 xmlFree (buf);
5083 return;
5084 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005085 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00005086 if (*in == 0xA) {
5087 in++;
5088 ctxt->input->line++; ctxt->input->col = 1;
5089 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005090 if (*in == 0xD) {
5091 in++;
5092 if (*in == 0xA) {
5093 ctxt->input->cur = in;
5094 in++;
5095 ctxt->input->line++; ctxt->input->col = 1;
5096 continue; /* while */
5097 }
5098 in--;
5099 }
5100 SHRINK;
5101 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005102 if (ctxt->instate == XML_PARSER_EOF) {
5103 xmlFree(buf);
5104 return;
5105 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005106 in = ctxt->input->cur;
5107 if (*in == '-') {
5108 if (in[1] == '-') {
5109 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005110 if (ctxt->input->id != inputid) {
5111 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5112 "comment doesn't start and stop in the same entity\n");
5113 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005114 SKIP(3);
5115 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5116 (!ctxt->disableSAX)) {
5117 if (buf != NULL)
5118 ctxt->sax->comment(ctxt->userData, buf);
5119 else
5120 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5121 }
5122 if (buf != NULL)
5123 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005124 if (ctxt->instate != XML_PARSER_EOF)
5125 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005126 return;
5127 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005128 if (buf != NULL) {
5129 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5130 "Double hyphen within comment: "
5131 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005132 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005133 } else
5134 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5135 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005136 in++;
5137 ctxt->input->col++;
5138 }
5139 in++;
5140 ctxt->input->col++;
5141 goto get_more;
5142 }
5143 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5144 xmlParseCommentComplex(ctxt, buf, len, size);
5145 ctxt->instate = state;
5146 return;
5147}
5148
Owen Taylor3473f882001-02-23 17:55:21 +00005149
5150/**
5151 * xmlParsePITarget:
5152 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005153 *
Owen Taylor3473f882001-02-23 17:55:21 +00005154 * parse the name of a PI
5155 *
5156 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5157 *
5158 * Returns the PITarget name or NULL
5159 */
5160
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005161const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005162xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005163 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005164
5165 name = xmlParseName(ctxt);
5166 if ((name != NULL) &&
5167 ((name[0] == 'x') || (name[0] == 'X')) &&
5168 ((name[1] == 'm') || (name[1] == 'M')) &&
5169 ((name[2] == 'l') || (name[2] == 'L'))) {
5170 int i;
5171 if ((name[0] == 'x') && (name[1] == 'm') &&
5172 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005173 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005174 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005175 return(name);
5176 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005177 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005178 return(name);
5179 }
5180 for (i = 0;;i++) {
5181 if (xmlW3CPIs[i] == NULL) break;
5182 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5183 return(name);
5184 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005185 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5186 "xmlParsePITarget: invalid name prefix 'xml'\n",
5187 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005188 }
Daniel Veillard37334572008-07-31 08:20:02 +00005189 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005190 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005191 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005192 }
Owen Taylor3473f882001-02-23 17:55:21 +00005193 return(name);
5194}
5195
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005196#ifdef LIBXML_CATALOG_ENABLED
5197/**
5198 * xmlParseCatalogPI:
5199 * @ctxt: an XML parser context
5200 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005201 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005202 * parse an XML Catalog Processing Instruction.
5203 *
5204 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5205 *
5206 * Occurs only if allowed by the user and if happening in the Misc
5207 * part of the document before any doctype informations
5208 * This will add the given catalog to the parsing context in order
5209 * to be used if there is a resolution need further down in the document
5210 */
5211
5212static void
5213xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5214 xmlChar *URL = NULL;
5215 const xmlChar *tmp, *base;
5216 xmlChar marker;
5217
5218 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005219 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005220 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5221 goto error;
5222 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005223 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005224 if (*tmp != '=') {
5225 return;
5226 }
5227 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005228 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005229 marker = *tmp;
5230 if ((marker != '\'') && (marker != '"'))
5231 goto error;
5232 tmp++;
5233 base = tmp;
5234 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5235 if (*tmp == 0)
5236 goto error;
5237 URL = xmlStrndup(base, tmp - base);
5238 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005239 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005240 if (*tmp != 0)
5241 goto error;
5242
5243 if (URL != NULL) {
5244 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5245 xmlFree(URL);
5246 }
5247 return;
5248
5249error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005250 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5251 "Catalog PI syntax error: %s\n",
5252 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005253 if (URL != NULL)
5254 xmlFree(URL);
5255}
5256#endif
5257
Owen Taylor3473f882001-02-23 17:55:21 +00005258/**
5259 * xmlParsePI:
5260 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005261 *
Owen Taylor3473f882001-02-23 17:55:21 +00005262 * parse an XML Processing Instruction.
5263 *
5264 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5265 *
5266 * The processing is transfered to SAX once parsed.
5267 */
5268
5269void
5270xmlParsePI(xmlParserCtxtPtr ctxt) {
5271 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005272 size_t len = 0;
5273 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005274 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005275 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005276 xmlParserInputState state;
5277 int count = 0;
5278
5279 if ((RAW == '<') && (NXT(1) == '?')) {
5280 xmlParserInputPtr input = ctxt->input;
5281 state = ctxt->instate;
5282 ctxt->instate = XML_PARSER_PI;
5283 /*
5284 * this is a Processing Instruction.
5285 */
5286 SKIP(2);
5287 SHRINK;
5288
5289 /*
5290 * Parse the target name and check for special support like
5291 * namespace.
5292 */
5293 target = xmlParsePITarget(ctxt);
5294 if (target != NULL) {
5295 if ((RAW == '?') && (NXT(1) == '>')) {
5296 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005297 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5298 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005299 }
5300 SKIP(2);
5301
5302 /*
5303 * SAX: PI detected.
5304 */
5305 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5306 (ctxt->sax->processingInstruction != NULL))
5307 ctxt->sax->processingInstruction(ctxt->userData,
5308 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005309 if (ctxt->instate != XML_PARSER_EOF)
5310 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005311 return;
5312 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005313 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005314 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005315 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 ctxt->instate = state;
5317 return;
5318 }
5319 cur = CUR;
5320 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005321 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5322 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 }
5324 SKIP_BLANKS;
5325 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005326 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005327 ((cur != '?') || (NXT(1) != '>'))) {
5328 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005329 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005330 size_t new_size = size * 2;
5331 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005332 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005333 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005334 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005335 ctxt->instate = state;
5336 return;
5337 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005338 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005339 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005340 }
5341 count++;
5342 if (count > 50) {
5343 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005344 if (ctxt->instate == XML_PARSER_EOF) {
5345 xmlFree(buf);
5346 return;
5347 }
Owen Taylor3473f882001-02-23 17:55:21 +00005348 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005349 if ((len > XML_MAX_TEXT_LENGTH) &&
5350 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5351 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5352 "PI %s too big found", target);
5353 xmlFree(buf);
5354 ctxt->instate = state;
5355 return;
5356 }
Owen Taylor3473f882001-02-23 17:55:21 +00005357 }
5358 COPY_BUF(l,buf,len,cur);
5359 NEXTL(l);
5360 cur = CUR_CHAR(l);
5361 if (cur == 0) {
5362 SHRINK;
5363 GROW;
5364 cur = CUR_CHAR(l);
5365 }
5366 }
Daniel Veillard51304812012-07-19 20:34:26 +08005367 if ((len > XML_MAX_TEXT_LENGTH) &&
5368 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5369 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5370 "PI %s too big found", target);
5371 xmlFree(buf);
5372 ctxt->instate = state;
5373 return;
5374 }
Owen Taylor3473f882001-02-23 17:55:21 +00005375 buf[len] = 0;
5376 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005377 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5378 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005379 } else {
5380 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005381 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5382 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005383 }
5384 SKIP(2);
5385
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005386#ifdef LIBXML_CATALOG_ENABLED
5387 if (((state == XML_PARSER_MISC) ||
5388 (state == XML_PARSER_START)) &&
5389 (xmlStrEqual(target, XML_CATALOG_PI))) {
5390 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5391 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5392 (allow == XML_CATA_ALLOW_ALL))
5393 xmlParseCatalogPI(ctxt, buf);
5394 }
5395#endif
5396
5397
Owen Taylor3473f882001-02-23 17:55:21 +00005398 /*
5399 * SAX: PI detected.
5400 */
5401 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5402 (ctxt->sax->processingInstruction != NULL))
5403 ctxt->sax->processingInstruction(ctxt->userData,
5404 target, buf);
5405 }
5406 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005407 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005408 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005409 }
Chris Evans77404b82011-12-14 16:18:25 +08005410 if (ctxt->instate != XML_PARSER_EOF)
5411 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005412 }
5413}
5414
5415/**
5416 * xmlParseNotationDecl:
5417 * @ctxt: an XML parser context
5418 *
5419 * parse a notation declaration
5420 *
5421 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5422 *
5423 * Hence there is actually 3 choices:
5424 * 'PUBLIC' S PubidLiteral
5425 * 'PUBLIC' S PubidLiteral S SystemLiteral
5426 * and 'SYSTEM' S SystemLiteral
5427 *
5428 * See the NOTE on xmlParseExternalID().
5429 */
5430
5431void
5432xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005433 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005434 xmlChar *Pubid;
5435 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005436
Daniel Veillarda07050d2003-10-19 14:46:32 +00005437 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005438 xmlParserInputPtr input = ctxt->input;
5439 SHRINK;
5440 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005441 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005442 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5443 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005444 return;
5445 }
5446 SKIP_BLANKS;
5447
Daniel Veillard76d66f42001-05-16 21:05:17 +00005448 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005449 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005450 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005451 return;
5452 }
William M. Brack76e95df2003-10-18 16:20:14 +00005453 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005454 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005455 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005456 return;
5457 }
Daniel Veillard37334572008-07-31 08:20:02 +00005458 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005459 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005460 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005461 name, NULL, NULL);
5462 }
Owen Taylor3473f882001-02-23 17:55:21 +00005463 SKIP_BLANKS;
5464
5465 /*
5466 * Parse the IDs.
5467 */
5468 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5469 SKIP_BLANKS;
5470
5471 if (RAW == '>') {
5472 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5474 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005475 }
5476 NEXT;
5477 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5478 (ctxt->sax->notationDecl != NULL))
5479 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5480 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005481 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005482 }
Owen Taylor3473f882001-02-23 17:55:21 +00005483 if (Systemid != NULL) xmlFree(Systemid);
5484 if (Pubid != NULL) xmlFree(Pubid);
5485 }
5486}
5487
5488/**
5489 * xmlParseEntityDecl:
5490 * @ctxt: an XML parser context
5491 *
5492 * parse <!ENTITY declarations
5493 *
5494 * [70] EntityDecl ::= GEDecl | PEDecl
5495 *
5496 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5497 *
5498 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5499 *
5500 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5501 *
5502 * [74] PEDef ::= EntityValue | ExternalID
5503 *
5504 * [76] NDataDecl ::= S 'NDATA' S Name
5505 *
5506 * [ VC: Notation Declared ]
5507 * The Name must match the declared name of a notation.
5508 */
5509
5510void
5511xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005512 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005513 xmlChar *value = NULL;
5514 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005515 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005516 int isParameter = 0;
5517 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005518 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005519
Daniel Veillard4c778d82005-01-23 17:37:44 +00005520 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005521 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005522 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005523 SHRINK;
5524 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005525 skipped = SKIP_BLANKS;
5526 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005527 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5528 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005529 }
Owen Taylor3473f882001-02-23 17:55:21 +00005530
5531 if (RAW == '%') {
5532 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005533 skipped = SKIP_BLANKS;
5534 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005535 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005536 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005537 }
Owen Taylor3473f882001-02-23 17:55:21 +00005538 isParameter = 1;
5539 }
5540
Daniel Veillard76d66f42001-05-16 21:05:17 +00005541 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005542 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005543 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5544 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005545 return;
5546 }
Daniel Veillard37334572008-07-31 08:20:02 +00005547 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005548 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005549 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005550 name, NULL, NULL);
5551 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005552 skipped = SKIP_BLANKS;
5553 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005554 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5555 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005556 }
Owen Taylor3473f882001-02-23 17:55:21 +00005557
Daniel Veillardf5582f12002-06-11 10:08:16 +00005558 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005559 /*
5560 * handle the various case of definitions...
5561 */
5562 if (isParameter) {
5563 if ((RAW == '"') || (RAW == '\'')) {
5564 value = xmlParseEntityValue(ctxt, &orig);
5565 if (value) {
5566 if ((ctxt->sax != NULL) &&
5567 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5568 ctxt->sax->entityDecl(ctxt->userData, name,
5569 XML_INTERNAL_PARAMETER_ENTITY,
5570 NULL, NULL, value);
5571 }
5572 } else {
5573 URI = xmlParseExternalID(ctxt, &literal, 1);
5574 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005575 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005576 }
5577 if (URI) {
5578 xmlURIPtr uri;
5579
5580 uri = xmlParseURI((const char *) URI);
5581 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005582 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5583 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005584 /*
5585 * This really ought to be a well formedness error
5586 * but the XML Core WG decided otherwise c.f. issue
5587 * E26 of the XML erratas.
5588 */
Owen Taylor3473f882001-02-23 17:55:21 +00005589 } else {
5590 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005591 /*
5592 * Okay this is foolish to block those but not
5593 * invalid URIs.
5594 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005595 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005596 } else {
5597 if ((ctxt->sax != NULL) &&
5598 (!ctxt->disableSAX) &&
5599 (ctxt->sax->entityDecl != NULL))
5600 ctxt->sax->entityDecl(ctxt->userData, name,
5601 XML_EXTERNAL_PARAMETER_ENTITY,
5602 literal, URI, NULL);
5603 }
5604 xmlFreeURI(uri);
5605 }
5606 }
5607 }
5608 } else {
5609 if ((RAW == '"') || (RAW == '\'')) {
5610 value = xmlParseEntityValue(ctxt, &orig);
5611 if ((ctxt->sax != NULL) &&
5612 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5613 ctxt->sax->entityDecl(ctxt->userData, name,
5614 XML_INTERNAL_GENERAL_ENTITY,
5615 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005616 /*
5617 * For expat compatibility in SAX mode.
5618 */
5619 if ((ctxt->myDoc == NULL) ||
5620 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5621 if (ctxt->myDoc == NULL) {
5622 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005623 if (ctxt->myDoc == NULL) {
5624 xmlErrMemory(ctxt, "New Doc failed");
5625 return;
5626 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005627 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005628 }
5629 if (ctxt->myDoc->intSubset == NULL)
5630 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5631 BAD_CAST "fake", NULL, NULL);
5632
Daniel Veillard1af9a412003-08-20 22:54:39 +00005633 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5634 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005635 }
Owen Taylor3473f882001-02-23 17:55:21 +00005636 } else {
5637 URI = xmlParseExternalID(ctxt, &literal, 1);
5638 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005639 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005640 }
5641 if (URI) {
5642 xmlURIPtr uri;
5643
5644 uri = xmlParseURI((const char *)URI);
5645 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005646 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5647 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005648 /*
5649 * This really ought to be a well formedness error
5650 * but the XML Core WG decided otherwise c.f. issue
5651 * E26 of the XML erratas.
5652 */
Owen Taylor3473f882001-02-23 17:55:21 +00005653 } else {
5654 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005655 /*
5656 * Okay this is foolish to block those but not
5657 * invalid URIs.
5658 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005659 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005660 }
5661 xmlFreeURI(uri);
5662 }
5663 }
William M. Brack76e95df2003-10-18 16:20:14 +00005664 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5666 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005667 }
5668 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005669 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005670 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005671 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005672 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5673 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005674 }
5675 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005676 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005677 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5678 (ctxt->sax->unparsedEntityDecl != NULL))
5679 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5680 literal, URI, ndata);
5681 } else {
5682 if ((ctxt->sax != NULL) &&
5683 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5684 ctxt->sax->entityDecl(ctxt->userData, name,
5685 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5686 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005687 /*
5688 * For expat compatibility in SAX mode.
5689 * assuming the entity repalcement was asked for
5690 */
5691 if ((ctxt->replaceEntities != 0) &&
5692 ((ctxt->myDoc == NULL) ||
5693 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5694 if (ctxt->myDoc == NULL) {
5695 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005696 if (ctxt->myDoc == NULL) {
5697 xmlErrMemory(ctxt, "New Doc failed");
5698 return;
5699 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005700 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005701 }
5702
5703 if (ctxt->myDoc->intSubset == NULL)
5704 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5705 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005706 xmlSAX2EntityDecl(ctxt, name,
5707 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5708 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005709 }
Owen Taylor3473f882001-02-23 17:55:21 +00005710 }
5711 }
5712 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005713 if (ctxt->instate == XML_PARSER_EOF)
5714 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005715 SKIP_BLANKS;
5716 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005717 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005718 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005719 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005720 } else {
5721 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005722 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5723 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005724 }
5725 NEXT;
5726 }
5727 if (orig != NULL) {
5728 /*
5729 * Ugly mechanism to save the raw entity value.
5730 */
5731 xmlEntityPtr cur = NULL;
5732
5733 if (isParameter) {
5734 if ((ctxt->sax != NULL) &&
5735 (ctxt->sax->getParameterEntity != NULL))
5736 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5737 } else {
5738 if ((ctxt->sax != NULL) &&
5739 (ctxt->sax->getEntity != NULL))
5740 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005741 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005742 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005743 }
Owen Taylor3473f882001-02-23 17:55:21 +00005744 }
5745 if (cur != NULL) {
5746 if (cur->orig != NULL)
5747 xmlFree(orig);
5748 else
5749 cur->orig = orig;
5750 } else
5751 xmlFree(orig);
5752 }
Owen Taylor3473f882001-02-23 17:55:21 +00005753 if (value != NULL) xmlFree(value);
5754 if (URI != NULL) xmlFree(URI);
5755 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005756 }
5757}
5758
5759/**
5760 * xmlParseDefaultDecl:
5761 * @ctxt: an XML parser context
5762 * @value: Receive a possible fixed default value for the attribute
5763 *
5764 * Parse an attribute default declaration
5765 *
5766 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5767 *
5768 * [ VC: Required Attribute ]
5769 * if the default declaration is the keyword #REQUIRED, then the
5770 * attribute must be specified for all elements of the type in the
5771 * attribute-list declaration.
5772 *
5773 * [ VC: Attribute Default Legal ]
5774 * The declared default value must meet the lexical constraints of
5775 * the declared attribute type c.f. xmlValidateAttributeDecl()
5776 *
5777 * [ VC: Fixed Attribute Default ]
5778 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005779 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005780 *
5781 * [ WFC: No < in Attribute Values ]
5782 * handled in xmlParseAttValue()
5783 *
5784 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005785 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005786 */
5787
5788int
5789xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5790 int val;
5791 xmlChar *ret;
5792
5793 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005794 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005795 SKIP(9);
5796 return(XML_ATTRIBUTE_REQUIRED);
5797 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005798 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005799 SKIP(8);
5800 return(XML_ATTRIBUTE_IMPLIED);
5801 }
5802 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005803 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005804 SKIP(6);
5805 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005806 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005807 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5808 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005809 }
5810 SKIP_BLANKS;
5811 }
5812 ret = xmlParseAttValue(ctxt);
5813 ctxt->instate = XML_PARSER_DTD;
5814 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005815 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005816 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005817 } else
5818 *value = ret;
5819 return(val);
5820}
5821
5822/**
5823 * xmlParseNotationType:
5824 * @ctxt: an XML parser context
5825 *
5826 * parse an Notation attribute type.
5827 *
5828 * Note: the leading 'NOTATION' S part has already being parsed...
5829 *
5830 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5831 *
5832 * [ VC: Notation Attributes ]
5833 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005834 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005835 *
5836 * Returns: the notation attribute tree built while parsing
5837 */
5838
5839xmlEnumerationPtr
5840xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005841 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005842 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005843
5844 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005845 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005846 return(NULL);
5847 }
5848 SHRINK;
5849 do {
5850 NEXT;
5851 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005852 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005853 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005854 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5855 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005856 xmlFreeEnumeration(ret);
5857 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005858 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005859 tmp = ret;
5860 while (tmp != NULL) {
5861 if (xmlStrEqual(name, tmp->name)) {
5862 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5863 "standalone: attribute notation value token %s duplicated\n",
5864 name, NULL);
5865 if (!xmlDictOwns(ctxt->dict, name))
5866 xmlFree((xmlChar *) name);
5867 break;
5868 }
5869 tmp = tmp->next;
5870 }
5871 if (tmp == NULL) {
5872 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005873 if (cur == NULL) {
5874 xmlFreeEnumeration(ret);
5875 return(NULL);
5876 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005877 if (last == NULL) ret = last = cur;
5878 else {
5879 last->next = cur;
5880 last = cur;
5881 }
Owen Taylor3473f882001-02-23 17:55:21 +00005882 }
5883 SKIP_BLANKS;
5884 } while (RAW == '|');
5885 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005886 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005887 xmlFreeEnumeration(ret);
5888 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005889 }
5890 NEXT;
5891 return(ret);
5892}
5893
5894/**
5895 * xmlParseEnumerationType:
5896 * @ctxt: an XML parser context
5897 *
5898 * parse an Enumeration attribute type.
5899 *
5900 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5901 *
5902 * [ VC: Enumeration ]
5903 * Values of this type must match one of the Nmtoken tokens in
5904 * the declaration
5905 *
5906 * Returns: the enumeration attribute tree built while parsing
5907 */
5908
5909xmlEnumerationPtr
5910xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5911 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005912 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005913
5914 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005915 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005916 return(NULL);
5917 }
5918 SHRINK;
5919 do {
5920 NEXT;
5921 SKIP_BLANKS;
5922 name = xmlParseNmtoken(ctxt);
5923 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005924 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 return(ret);
5926 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005927 tmp = ret;
5928 while (tmp != NULL) {
5929 if (xmlStrEqual(name, tmp->name)) {
5930 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5931 "standalone: attribute enumeration value token %s duplicated\n",
5932 name, NULL);
5933 if (!xmlDictOwns(ctxt->dict, name))
5934 xmlFree(name);
5935 break;
5936 }
5937 tmp = tmp->next;
5938 }
5939 if (tmp == NULL) {
5940 cur = xmlCreateEnumeration(name);
5941 if (!xmlDictOwns(ctxt->dict, name))
5942 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005943 if (cur == NULL) {
5944 xmlFreeEnumeration(ret);
5945 return(NULL);
5946 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005947 if (last == NULL) ret = last = cur;
5948 else {
5949 last->next = cur;
5950 last = cur;
5951 }
Owen Taylor3473f882001-02-23 17:55:21 +00005952 }
5953 SKIP_BLANKS;
5954 } while (RAW == '|');
5955 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005956 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005957 return(ret);
5958 }
5959 NEXT;
5960 return(ret);
5961}
5962
5963/**
5964 * xmlParseEnumeratedType:
5965 * @ctxt: an XML parser context
5966 * @tree: the enumeration tree built while parsing
5967 *
5968 * parse an Enumerated attribute type.
5969 *
5970 * [57] EnumeratedType ::= NotationType | Enumeration
5971 *
5972 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5973 *
5974 *
5975 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5976 */
5977
5978int
5979xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005980 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005981 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005982 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005983 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5984 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005985 return(0);
5986 }
5987 SKIP_BLANKS;
5988 *tree = xmlParseNotationType(ctxt);
5989 if (*tree == NULL) return(0);
5990 return(XML_ATTRIBUTE_NOTATION);
5991 }
5992 *tree = xmlParseEnumerationType(ctxt);
5993 if (*tree == NULL) return(0);
5994 return(XML_ATTRIBUTE_ENUMERATION);
5995}
5996
5997/**
5998 * xmlParseAttributeType:
5999 * @ctxt: an XML parser context
6000 * @tree: the enumeration tree built while parsing
6001 *
6002 * parse the Attribute list def for an element
6003 *
6004 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6005 *
6006 * [55] StringType ::= 'CDATA'
6007 *
6008 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6009 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6010 *
6011 * Validity constraints for attribute values syntax are checked in
6012 * xmlValidateAttributeValue()
6013 *
6014 * [ VC: ID ]
6015 * Values of type ID must match the Name production. A name must not
6016 * appear more than once in an XML document as a value of this type;
6017 * i.e., ID values must uniquely identify the elements which bear them.
6018 *
6019 * [ VC: One ID per Element Type ]
6020 * No element type may have more than one ID attribute specified.
6021 *
6022 * [ VC: ID Attribute Default ]
6023 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6024 *
6025 * [ VC: IDREF ]
6026 * Values of type IDREF must match the Name production, and values
6027 * of type IDREFS must match Names; each IDREF Name must match the value
6028 * of an ID attribute on some element in the XML document; i.e. IDREF
6029 * values must match the value of some ID attribute.
6030 *
6031 * [ VC: Entity Name ]
6032 * Values of type ENTITY must match the Name production, values
6033 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006034 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00006035 *
6036 * [ VC: Name Token ]
6037 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006038 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00006039 *
6040 * Returns the attribute type
6041 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006042int
Owen Taylor3473f882001-02-23 17:55:21 +00006043xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6044 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006045 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006046 SKIP(5);
6047 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006048 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006049 SKIP(6);
6050 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006051 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006052 SKIP(5);
6053 return(XML_ATTRIBUTE_IDREF);
6054 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6055 SKIP(2);
6056 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006057 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006058 SKIP(6);
6059 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006060 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006061 SKIP(8);
6062 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006063 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006064 SKIP(8);
6065 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006066 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006067 SKIP(7);
6068 return(XML_ATTRIBUTE_NMTOKEN);
6069 }
6070 return(xmlParseEnumeratedType(ctxt, tree));
6071}
6072
6073/**
6074 * xmlParseAttributeListDecl:
6075 * @ctxt: an XML parser context
6076 *
6077 * : parse the Attribute list def for an element
6078 *
6079 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6080 *
6081 * [53] AttDef ::= S Name S AttType S DefaultDecl
6082 *
6083 */
6084void
6085xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006086 const xmlChar *elemName;
6087 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00006088 xmlEnumerationPtr tree;
6089
Daniel Veillarda07050d2003-10-19 14:46:32 +00006090 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006091 xmlParserInputPtr input = ctxt->input;
6092
6093 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006094 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006095 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006096 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006097 }
6098 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006099 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006101 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6102 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006103 return;
6104 }
6105 SKIP_BLANKS;
6106 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006107 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006108 const xmlChar *check = CUR_PTR;
6109 int type;
6110 int def;
6111 xmlChar *defaultValue = NULL;
6112
6113 GROW;
6114 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006115 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006116 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006117 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6118 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006119 break;
6120 }
6121 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006122 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006123 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006124 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006125 break;
6126 }
6127 SKIP_BLANKS;
6128
6129 type = xmlParseAttributeType(ctxt, &tree);
6130 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006131 break;
6132 }
6133
6134 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006135 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006136 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6137 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006138 if (tree != NULL)
6139 xmlFreeEnumeration(tree);
6140 break;
6141 }
6142 SKIP_BLANKS;
6143
6144 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6145 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006146 if (defaultValue != NULL)
6147 xmlFree(defaultValue);
6148 if (tree != NULL)
6149 xmlFreeEnumeration(tree);
6150 break;
6151 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006152 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6153 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006154
6155 GROW;
6156 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006157 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006158 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006159 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006160 if (defaultValue != NULL)
6161 xmlFree(defaultValue);
6162 if (tree != NULL)
6163 xmlFreeEnumeration(tree);
6164 break;
6165 }
6166 SKIP_BLANKS;
6167 }
6168 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006169 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6170 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006171 if (defaultValue != NULL)
6172 xmlFree(defaultValue);
6173 if (tree != NULL)
6174 xmlFreeEnumeration(tree);
6175 break;
6176 }
6177 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6178 (ctxt->sax->attributeDecl != NULL))
6179 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6180 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006181 else if (tree != NULL)
6182 xmlFreeEnumeration(tree);
6183
6184 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006185 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006186 (def != XML_ATTRIBUTE_REQUIRED)) {
6187 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6188 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006189 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006190 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6191 }
Owen Taylor3473f882001-02-23 17:55:21 +00006192 if (defaultValue != NULL)
6193 xmlFree(defaultValue);
6194 GROW;
6195 }
6196 if (RAW == '>') {
6197 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006198 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6199 "Attribute list declaration doesn't start and stop in the same entity\n",
6200 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006201 }
6202 NEXT;
6203 }
Owen Taylor3473f882001-02-23 17:55:21 +00006204 }
6205}
6206
6207/**
6208 * xmlParseElementMixedContentDecl:
6209 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006210 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006211 *
6212 * parse the declaration for a Mixed Element content
6213 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006214 *
Owen Taylor3473f882001-02-23 17:55:21 +00006215 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6216 * '(' S? '#PCDATA' S? ')'
6217 *
6218 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6219 *
6220 * [ VC: No Duplicate Types ]
6221 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006222 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006223 *
6224 * returns: the list of the xmlElementContentPtr describing the element choices
6225 */
6226xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006227xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006228 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006229 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006230
6231 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006232 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006233 SKIP(7);
6234 SKIP_BLANKS;
6235 SHRINK;
6236 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006237 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006238 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6239"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006240 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006241 }
Owen Taylor3473f882001-02-23 17:55:21 +00006242 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006243 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006244 if (ret == NULL)
6245 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006246 if (RAW == '*') {
6247 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6248 NEXT;
6249 }
6250 return(ret);
6251 }
6252 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006253 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006254 if (ret == NULL) return(NULL);
6255 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006256 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006257 NEXT;
6258 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006259 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006260 if (ret == NULL) return(NULL);
6261 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006262 if (cur != NULL)
6263 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006264 cur = ret;
6265 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006266 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006267 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006268 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006269 if (n->c1 != NULL)
6270 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006271 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006272 if (n != NULL)
6273 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006274 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006275 }
6276 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006277 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006278 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006279 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006280 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006281 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006282 return(NULL);
6283 }
6284 SKIP_BLANKS;
6285 GROW;
6286 }
6287 if ((RAW == ')') && (NXT(1) == '*')) {
6288 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006289 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006290 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006291 if (cur->c2 != NULL)
6292 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006293 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006294 if (ret != NULL)
6295 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006296 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006297 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6298"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006299 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006300 }
Owen Taylor3473f882001-02-23 17:55:21 +00006301 SKIP(2);
6302 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006303 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006304 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006305 return(NULL);
6306 }
6307
6308 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006309 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006310 }
6311 return(ret);
6312}
6313
6314/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006315 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006316 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006317 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006318 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006319 *
6320 * parse the declaration for a Mixed Element content
6321 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006322 *
Owen Taylor3473f882001-02-23 17:55:21 +00006323 *
6324 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6325 *
6326 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6327 *
6328 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6329 *
6330 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6331 *
6332 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6333 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006334 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006335 * opening or closing parentheses in a choice, seq, or Mixed
6336 * construct is contained in the replacement text for a parameter
6337 * entity, both must be contained in the same replacement text. For
6338 * interoperability, if a parameter-entity reference appears in a
6339 * choice, seq, or Mixed construct, its replacement text should not
6340 * be empty, and neither the first nor last non-blank character of
6341 * the replacement text should be a connector (| or ,).
6342 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006343 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006344 * hierarchy.
6345 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006346static xmlElementContentPtr
6347xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6348 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006349 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006350 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006351 xmlChar type = 0;
6352
Daniel Veillard489f9672009-08-10 16:49:30 +02006353 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6354 (depth > 2048)) {
6355 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6356"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6357 depth);
6358 return(NULL);
6359 }
Owen Taylor3473f882001-02-23 17:55:21 +00006360 SKIP_BLANKS;
6361 GROW;
6362 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006363 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006364
Owen Taylor3473f882001-02-23 17:55:21 +00006365 /* Recurse on first child */
6366 NEXT;
6367 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006368 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6369 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 SKIP_BLANKS;
6371 GROW;
6372 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006373 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006374 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006375 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006376 return(NULL);
6377 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006378 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006379 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006380 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006381 return(NULL);
6382 }
Owen Taylor3473f882001-02-23 17:55:21 +00006383 GROW;
6384 if (RAW == '?') {
6385 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6386 NEXT;
6387 } else if (RAW == '*') {
6388 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6389 NEXT;
6390 } else if (RAW == '+') {
6391 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6392 NEXT;
6393 } else {
6394 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6395 }
Owen Taylor3473f882001-02-23 17:55:21 +00006396 GROW;
6397 }
6398 SKIP_BLANKS;
6399 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006400 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006401 /*
6402 * Each loop we parse one separator and one element.
6403 */
6404 if (RAW == ',') {
6405 if (type == 0) type = CUR;
6406
6407 /*
6408 * Detect "Name | Name , Name" error
6409 */
6410 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006411 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006412 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006413 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006414 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006415 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006416 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006417 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006418 return(NULL);
6419 }
6420 NEXT;
6421
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006422 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006423 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006424 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006425 xmlFreeDocElementContent(ctxt->myDoc, last);
6426 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006427 return(NULL);
6428 }
6429 if (last == NULL) {
6430 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006431 if (ret != NULL)
6432 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006433 ret = cur = op;
6434 } else {
6435 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006436 if (op != NULL)
6437 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006438 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006439 if (last != NULL)
6440 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006441 cur =op;
6442 last = NULL;
6443 }
6444 } else if (RAW == '|') {
6445 if (type == 0) type = CUR;
6446
6447 /*
6448 * Detect "Name , Name | Name" error
6449 */
6450 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006451 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006452 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006453 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006454 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006455 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006456 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006457 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006458 return(NULL);
6459 }
6460 NEXT;
6461
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006462 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006463 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006464 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006465 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006466 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006467 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006468 return(NULL);
6469 }
6470 if (last == NULL) {
6471 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006472 if (ret != NULL)
6473 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006474 ret = cur = op;
6475 } else {
6476 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006477 if (op != NULL)
6478 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006479 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006480 if (last != NULL)
6481 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006482 cur =op;
6483 last = NULL;
6484 }
6485 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006486 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006487 if ((last != NULL) && (last != ret))
6488 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006489 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006490 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006491 return(NULL);
6492 }
6493 GROW;
6494 SKIP_BLANKS;
6495 GROW;
6496 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006497 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006498 /* Recurse on second child */
6499 NEXT;
6500 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006501 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6502 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006503 SKIP_BLANKS;
6504 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006505 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006506 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006507 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006508 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006509 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006510 return(NULL);
6511 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006512 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006513 if (last == NULL) {
6514 if (ret != NULL)
6515 xmlFreeDocElementContent(ctxt->myDoc, ret);
6516 return(NULL);
6517 }
Owen Taylor3473f882001-02-23 17:55:21 +00006518 if (RAW == '?') {
6519 last->ocur = XML_ELEMENT_CONTENT_OPT;
6520 NEXT;
6521 } else if (RAW == '*') {
6522 last->ocur = XML_ELEMENT_CONTENT_MULT;
6523 NEXT;
6524 } else if (RAW == '+') {
6525 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6526 NEXT;
6527 } else {
6528 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6529 }
6530 }
6531 SKIP_BLANKS;
6532 GROW;
6533 }
6534 if ((cur != NULL) && (last != NULL)) {
6535 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006536 if (last != NULL)
6537 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006538 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006539 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006540 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6541"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006542 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006543 }
Owen Taylor3473f882001-02-23 17:55:21 +00006544 NEXT;
6545 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006546 if (ret != NULL) {
6547 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6548 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6549 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6550 else
6551 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6552 }
Owen Taylor3473f882001-02-23 17:55:21 +00006553 NEXT;
6554 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006555 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006556 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006557 cur = ret;
6558 /*
6559 * Some normalization:
6560 * (a | b* | c?)* == (a | b | c)*
6561 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006562 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006563 if ((cur->c1 != NULL) &&
6564 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6565 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6566 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6567 if ((cur->c2 != NULL) &&
6568 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6569 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6570 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6571 cur = cur->c2;
6572 }
6573 }
Owen Taylor3473f882001-02-23 17:55:21 +00006574 NEXT;
6575 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006576 if (ret != NULL) {
6577 int found = 0;
6578
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006579 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6580 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6581 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006582 else
6583 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006584 /*
6585 * Some normalization:
6586 * (a | b*)+ == (a | b)*
6587 * (a | b?)+ == (a | b)*
6588 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006589 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006590 if ((cur->c1 != NULL) &&
6591 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6592 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6593 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6594 found = 1;
6595 }
6596 if ((cur->c2 != NULL) &&
6597 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6598 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6599 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6600 found = 1;
6601 }
6602 cur = cur->c2;
6603 }
6604 if (found)
6605 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6606 }
Owen Taylor3473f882001-02-23 17:55:21 +00006607 NEXT;
6608 }
6609 return(ret);
6610}
6611
6612/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006613 * xmlParseElementChildrenContentDecl:
6614 * @ctxt: an XML parser context
6615 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006616 *
6617 * parse the declaration for a Mixed Element content
6618 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6619 *
6620 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6621 *
6622 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6623 *
6624 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6625 *
6626 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6627 *
6628 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6629 * TODO Parameter-entity replacement text must be properly nested
6630 * with parenthesized groups. That is to say, if either of the
6631 * opening or closing parentheses in a choice, seq, or Mixed
6632 * construct is contained in the replacement text for a parameter
6633 * entity, both must be contained in the same replacement text. For
6634 * interoperability, if a parameter-entity reference appears in a
6635 * choice, seq, or Mixed construct, its replacement text should not
6636 * be empty, and neither the first nor last non-blank character of
6637 * the replacement text should be a connector (| or ,).
6638 *
6639 * Returns the tree of xmlElementContentPtr describing the element
6640 * hierarchy.
6641 */
6642xmlElementContentPtr
6643xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6644 /* stub left for API/ABI compat */
6645 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6646}
6647
6648/**
Owen Taylor3473f882001-02-23 17:55:21 +00006649 * xmlParseElementContentDecl:
6650 * @ctxt: an XML parser context
6651 * @name: the name of the element being defined.
6652 * @result: the Element Content pointer will be stored here if any
6653 *
6654 * parse the declaration for an Element content either Mixed or Children,
6655 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006656 *
Owen Taylor3473f882001-02-23 17:55:21 +00006657 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6658 *
6659 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6660 */
6661
6662int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006663xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006664 xmlElementContentPtr *result) {
6665
6666 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006667 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006668 int res;
6669
6670 *result = NULL;
6671
6672 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006673 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006674 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006675 return(-1);
6676 }
6677 NEXT;
6678 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006679 if (ctxt->instate == XML_PARSER_EOF)
6680 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006681 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006682 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006683 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006684 res = XML_ELEMENT_TYPE_MIXED;
6685 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006686 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006687 res = XML_ELEMENT_TYPE_ELEMENT;
6688 }
Owen Taylor3473f882001-02-23 17:55:21 +00006689 SKIP_BLANKS;
6690 *result = tree;
6691 return(res);
6692}
6693
6694/**
6695 * xmlParseElementDecl:
6696 * @ctxt: an XML parser context
6697 *
6698 * parse an Element declaration.
6699 *
6700 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6701 *
6702 * [ VC: Unique Element Type Declaration ]
6703 * No element type may be declared more than once
6704 *
6705 * Returns the type of the element, or -1 in case of error
6706 */
6707int
6708xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006709 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006710 int ret = -1;
6711 xmlElementContentPtr content = NULL;
6712
Daniel Veillard4c778d82005-01-23 17:37:44 +00006713 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006714 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006715 xmlParserInputPtr input = ctxt->input;
6716
6717 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006718 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006719 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006721 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006722 }
6723 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006724 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006725 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006726 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6727 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006728 return(-1);
6729 }
6730 while ((RAW == 0) && (ctxt->inputNr > 1))
6731 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006732 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006733 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6734 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006735 }
6736 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006737 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006738 SKIP(5);
6739 /*
6740 * Element must always be empty.
6741 */
6742 ret = XML_ELEMENT_TYPE_EMPTY;
6743 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6744 (NXT(2) == 'Y')) {
6745 SKIP(3);
6746 /*
6747 * Element is a generic container.
6748 */
6749 ret = XML_ELEMENT_TYPE_ANY;
6750 } else if (RAW == '(') {
6751 ret = xmlParseElementContentDecl(ctxt, name, &content);
6752 } else {
6753 /*
6754 * [ WFC: PEs in Internal Subset ] error handling.
6755 */
6756 if ((RAW == '%') && (ctxt->external == 0) &&
6757 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006758 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006759 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006760 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006761 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006762 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6763 }
Owen Taylor3473f882001-02-23 17:55:21 +00006764 return(-1);
6765 }
6766
6767 SKIP_BLANKS;
6768 /*
6769 * Pop-up of finished entities.
6770 */
6771 while ((RAW == 0) && (ctxt->inputNr > 1))
6772 xmlPopInput(ctxt);
6773 SKIP_BLANKS;
6774
6775 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006776 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006777 if (content != NULL) {
6778 xmlFreeDocElementContent(ctxt->myDoc, content);
6779 }
Owen Taylor3473f882001-02-23 17:55:21 +00006780 } else {
6781 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006782 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6783 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006784 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006785
Owen Taylor3473f882001-02-23 17:55:21 +00006786 NEXT;
6787 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006788 (ctxt->sax->elementDecl != NULL)) {
6789 if (content != NULL)
6790 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006791 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6792 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006793 if ((content != NULL) && (content->parent == NULL)) {
6794 /*
6795 * this is a trick: if xmlAddElementDecl is called,
6796 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006797 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006798 * interfaces or change the API/ABI
6799 */
6800 xmlFreeDocElementContent(ctxt->myDoc, content);
6801 }
6802 } else if (content != NULL) {
6803 xmlFreeDocElementContent(ctxt->myDoc, content);
6804 }
Owen Taylor3473f882001-02-23 17:55:21 +00006805 }
Owen Taylor3473f882001-02-23 17:55:21 +00006806 }
6807 return(ret);
6808}
6809
6810/**
Owen Taylor3473f882001-02-23 17:55:21 +00006811 * xmlParseConditionalSections
6812 * @ctxt: an XML parser context
6813 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006814 * [61] conditionalSect ::= includeSect | ignoreSect
6815 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006816 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6817 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6818 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6819 */
6820
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006821static void
Owen Taylor3473f882001-02-23 17:55:21 +00006822xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006823 int id = ctxt->input->id;
6824
Owen Taylor3473f882001-02-23 17:55:21 +00006825 SKIP(3);
6826 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006827 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006828 SKIP(7);
6829 SKIP_BLANKS;
6830 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006831 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006832 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006833 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006834 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006835 if (ctxt->input->id != id) {
6836 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6837 "All markup of the conditional section is not in the same entity\n",
6838 NULL, NULL);
6839 }
Owen Taylor3473f882001-02-23 17:55:21 +00006840 NEXT;
6841 }
6842 if (xmlParserDebugEntities) {
6843 if ((ctxt->input != NULL) && (ctxt->input->filename))
6844 xmlGenericError(xmlGenericErrorContext,
6845 "%s(%d): ", ctxt->input->filename,
6846 ctxt->input->line);
6847 xmlGenericError(xmlGenericErrorContext,
6848 "Entering INCLUDE Conditional Section\n");
6849 }
6850
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006851 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6852 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006853 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006854 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006855
6856 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6857 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006858 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006859 NEXT;
6860 } else if (RAW == '%') {
6861 xmlParsePEReference(ctxt);
6862 } else
6863 xmlParseMarkupDecl(ctxt);
6864
6865 /*
6866 * Pop-up of finished entities.
6867 */
6868 while ((RAW == 0) && (ctxt->inputNr > 1))
6869 xmlPopInput(ctxt);
6870
Daniel Veillardfdc91562002-07-01 21:52:03 +00006871 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006872 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006873 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006874 break;
6875 }
6876 }
6877 if (xmlParserDebugEntities) {
6878 if ((ctxt->input != NULL) && (ctxt->input->filename))
6879 xmlGenericError(xmlGenericErrorContext,
6880 "%s(%d): ", ctxt->input->filename,
6881 ctxt->input->line);
6882 xmlGenericError(xmlGenericErrorContext,
6883 "Leaving INCLUDE Conditional Section\n");
6884 }
6885
Daniel Veillarda07050d2003-10-19 14:46:32 +00006886 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006887 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006888 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006889 int depth = 0;
6890
6891 SKIP(6);
6892 SKIP_BLANKS;
6893 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006894 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006895 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006896 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006897 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006898 if (ctxt->input->id != id) {
6899 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6900 "All markup of the conditional section is not in the same entity\n",
6901 NULL, NULL);
6902 }
Owen Taylor3473f882001-02-23 17:55:21 +00006903 NEXT;
6904 }
6905 if (xmlParserDebugEntities) {
6906 if ((ctxt->input != NULL) && (ctxt->input->filename))
6907 xmlGenericError(xmlGenericErrorContext,
6908 "%s(%d): ", ctxt->input->filename,
6909 ctxt->input->line);
6910 xmlGenericError(xmlGenericErrorContext,
6911 "Entering IGNORE Conditional Section\n");
6912 }
6913
6914 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006915 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006916 * But disable SAX event generating DTD building in the meantime
6917 */
6918 state = ctxt->disableSAX;
6919 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006920 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006921 ctxt->instate = XML_PARSER_IGNORE;
6922
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006923 while (((depth >= 0) && (RAW != 0)) &&
6924 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006925 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6926 depth++;
6927 SKIP(3);
6928 continue;
6929 }
6930 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6931 if (--depth >= 0) SKIP(3);
6932 continue;
6933 }
6934 NEXT;
6935 continue;
6936 }
6937
6938 ctxt->disableSAX = state;
6939 ctxt->instate = instate;
6940
6941 if (xmlParserDebugEntities) {
6942 if ((ctxt->input != NULL) && (ctxt->input->filename))
6943 xmlGenericError(xmlGenericErrorContext,
6944 "%s(%d): ", ctxt->input->filename,
6945 ctxt->input->line);
6946 xmlGenericError(xmlGenericErrorContext,
6947 "Leaving IGNORE Conditional Section\n");
6948 }
6949
6950 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006951 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006952 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006953 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006954 }
6955
6956 if (RAW == 0)
6957 SHRINK;
6958
6959 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006960 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006961 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006962 if (ctxt->input->id != id) {
6963 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6964 "All markup of the conditional section is not in the same entity\n",
6965 NULL, NULL);
6966 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006967 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006968 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006969 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006970 }
6971}
6972
6973/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006974 * xmlParseMarkupDecl:
6975 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006976 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006977 * parse Markup declarations
6978 *
6979 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6980 * NotationDecl | PI | Comment
6981 *
6982 * [ VC: Proper Declaration/PE Nesting ]
6983 * Parameter-entity replacement text must be properly nested with
6984 * markup declarations. That is to say, if either the first character
6985 * or the last character of a markup declaration (markupdecl above) is
6986 * contained in the replacement text for a parameter-entity reference,
6987 * both must be contained in the same replacement text.
6988 *
6989 * [ WFC: PEs in Internal Subset ]
6990 * In the internal DTD subset, parameter-entity references can occur
6991 * only where markup declarations can occur, not within markup declarations.
6992 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006993 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006994 */
6995void
6996xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6997 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006998 if (CUR == '<') {
6999 if (NXT(1) == '!') {
7000 switch (NXT(2)) {
7001 case 'E':
7002 if (NXT(3) == 'L')
7003 xmlParseElementDecl(ctxt);
7004 else if (NXT(3) == 'N')
7005 xmlParseEntityDecl(ctxt);
7006 break;
7007 case 'A':
7008 xmlParseAttributeListDecl(ctxt);
7009 break;
7010 case 'N':
7011 xmlParseNotationDecl(ctxt);
7012 break;
7013 case '-':
7014 xmlParseComment(ctxt);
7015 break;
7016 default:
7017 /* there is an error but it will be detected later */
7018 break;
7019 }
7020 } else if (NXT(1) == '?') {
7021 xmlParsePI(ctxt);
7022 }
7023 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08007024
7025 /*
7026 * detect requirement to exit there and act accordingly
7027 * and avoid having instate overriden later on
7028 */
7029 if (ctxt->instate == XML_PARSER_EOF)
7030 return;
7031
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007032 /*
7033 * This is only for internal subset. On external entities,
7034 * the replacement is done before parsing stage
7035 */
7036 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7037 xmlParsePEReference(ctxt);
7038
7039 /*
7040 * Conditional sections are allowed from entities included
7041 * by PE References in the internal subset.
7042 */
7043 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7044 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7045 xmlParseConditionalSections(ctxt);
7046 }
7047 }
7048
7049 ctxt->instate = XML_PARSER_DTD;
7050}
7051
7052/**
7053 * xmlParseTextDecl:
7054 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00007055 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007056 * parse an XML declaration header for external entities
7057 *
7058 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007059 */
7060
7061void
7062xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7063 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007064 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007065
7066 /*
7067 * We know that '<?xml' is here.
7068 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007069 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007070 SKIP(5);
7071 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007072 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007073 return;
7074 }
7075
William M. Brack76e95df2003-10-18 16:20:14 +00007076 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007077 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7078 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007079 }
7080 SKIP_BLANKS;
7081
7082 /*
7083 * We may have the VersionInfo here.
7084 */
7085 version = xmlParseVersionInfo(ctxt);
7086 if (version == NULL)
7087 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00007088 else {
William M. Brack76e95df2003-10-18 16:20:14 +00007089 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007090 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7091 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00007092 }
7093 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007094 ctxt->input->version = version;
7095
7096 /*
7097 * We must have the encoding declaration
7098 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007099 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007100 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7101 /*
7102 * The XML REC instructs us to stop parsing right here
7103 */
7104 return;
7105 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007106 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7107 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7108 "Missing encoding in text declaration\n");
7109 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007110
7111 SKIP_BLANKS;
7112 if ((RAW == '?') && (NXT(1) == '>')) {
7113 SKIP(2);
7114 } else if (RAW == '>') {
7115 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007116 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007117 NEXT;
7118 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007119 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007120 MOVETO_ENDTAG(CUR_PTR);
7121 NEXT;
7122 }
7123}
7124
7125/**
Owen Taylor3473f882001-02-23 17:55:21 +00007126 * xmlParseExternalSubset:
7127 * @ctxt: an XML parser context
7128 * @ExternalID: the external identifier
7129 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007130 *
Owen Taylor3473f882001-02-23 17:55:21 +00007131 * parse Markup declarations from an external subset
7132 *
7133 * [30] extSubset ::= textDecl? extSubsetDecl
7134 *
7135 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7136 */
7137void
7138xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7139 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007140 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007141 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007142
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007143 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007144 (ctxt->input->end - ctxt->input->cur >= 4)) {
7145 xmlChar start[4];
7146 xmlCharEncoding enc;
7147
7148 start[0] = RAW;
7149 start[1] = NXT(1);
7150 start[2] = NXT(2);
7151 start[3] = NXT(3);
7152 enc = xmlDetectCharEncoding(start, 4);
7153 if (enc != XML_CHAR_ENCODING_NONE)
7154 xmlSwitchEncoding(ctxt, enc);
7155 }
7156
Daniel Veillarda07050d2003-10-19 14:46:32 +00007157 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007158 xmlParseTextDecl(ctxt);
7159 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7160 /*
7161 * The XML REC instructs us to stop parsing right here
7162 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007163 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007164 return;
7165 }
7166 }
7167 if (ctxt->myDoc == NULL) {
7168 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007169 if (ctxt->myDoc == NULL) {
7170 xmlErrMemory(ctxt, "New Doc failed");
7171 return;
7172 }
7173 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007174 }
7175 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7176 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7177
7178 ctxt->instate = XML_PARSER_DTD;
7179 ctxt->external = 1;
7180 while (((RAW == '<') && (NXT(1) == '?')) ||
7181 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007182 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007183 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007184 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007185
7186 GROW;
7187 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7188 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007189 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007190 NEXT;
7191 } else if (RAW == '%') {
7192 xmlParsePEReference(ctxt);
7193 } else
7194 xmlParseMarkupDecl(ctxt);
7195
7196 /*
7197 * Pop-up of finished entities.
7198 */
7199 while ((RAW == 0) && (ctxt->inputNr > 1))
7200 xmlPopInput(ctxt);
7201
Daniel Veillardfdc91562002-07-01 21:52:03 +00007202 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007203 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007204 break;
7205 }
7206 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007207
Owen Taylor3473f882001-02-23 17:55:21 +00007208 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007209 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007210 }
7211
7212}
7213
7214/**
7215 * xmlParseReference:
7216 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007217 *
Owen Taylor3473f882001-02-23 17:55:21 +00007218 * parse and handle entity references in content, depending on the SAX
7219 * interface, this may end-up in a call to character() if this is a
7220 * CharRef, a predefined entity, if there is no reference() callback.
7221 * or if the parser was asked to switch to that mode.
7222 *
7223 * [67] Reference ::= EntityRef | CharRef
7224 */
7225void
7226xmlParseReference(xmlParserCtxtPtr ctxt) {
7227 xmlEntityPtr ent;
7228 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007229 int was_checked;
7230 xmlNodePtr list = NULL;
7231 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007232
Daniel Veillard0161e632008-08-28 15:36:32 +00007233
7234 if (RAW != '&')
7235 return;
7236
7237 /*
7238 * Simple case of a CharRef
7239 */
Owen Taylor3473f882001-02-23 17:55:21 +00007240 if (NXT(1) == '#') {
7241 int i = 0;
7242 xmlChar out[10];
7243 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007244 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007245
Daniel Veillarddc171602008-03-26 17:41:38 +00007246 if (value == 0)
7247 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007248 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7249 /*
7250 * So we are using non-UTF-8 buffers
7251 * Check that the char fit on 8bits, if not
7252 * generate a CharRef.
7253 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007254 if (value <= 0xFF) {
7255 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007256 out[1] = 0;
7257 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7258 (!ctxt->disableSAX))
7259 ctxt->sax->characters(ctxt->userData, out, 1);
7260 } else {
7261 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007262 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007263 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007264 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007265 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7266 (!ctxt->disableSAX))
7267 ctxt->sax->reference(ctxt->userData, out);
7268 }
7269 } else {
7270 /*
7271 * Just encode the value in UTF-8
7272 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007273 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007274 out[i] = 0;
7275 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7276 (!ctxt->disableSAX))
7277 ctxt->sax->characters(ctxt->userData, out, i);
7278 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007279 return;
7280 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007281
Daniel Veillard0161e632008-08-28 15:36:32 +00007282 /*
7283 * We are seeing an entity reference
7284 */
7285 ent = xmlParseEntityRef(ctxt);
7286 if (ent == NULL) return;
7287 if (!ctxt->wellFormed)
7288 return;
7289 was_checked = ent->checked;
7290
7291 /* special case of predefined entities */
7292 if ((ent->name == NULL) ||
7293 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7294 val = ent->content;
7295 if (val == NULL) return;
7296 /*
7297 * inline the entity.
7298 */
7299 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7300 (!ctxt->disableSAX))
7301 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7302 return;
7303 }
7304
7305 /*
7306 * The first reference to the entity trigger a parsing phase
7307 * where the ent->children is filled with the result from
7308 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007309 * Note: external parsed entities will not be loaded, it is not
7310 * required for a non-validating parser, unless the parsing option
7311 * of validating, or substituting entities were given. Doing so is
7312 * far more secure as the parser will only process data coming from
7313 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007314 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007315 if (((ent->checked == 0) ||
7316 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007317 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7318 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007319 unsigned long oldnbent = ctxt->nbentities;
7320
7321 /*
7322 * This is a bit hackish but this seems the best
7323 * way to make sure both SAX and DOM entity support
7324 * behaves okay.
7325 */
7326 void *user_data;
7327 if (ctxt->userData == ctxt)
7328 user_data = NULL;
7329 else
7330 user_data = ctxt->userData;
7331
7332 /*
7333 * Check that this entity is well formed
7334 * 4.3.2: An internal general parsed entity is well-formed
7335 * if its replacement text matches the production labeled
7336 * content.
7337 */
7338 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7339 ctxt->depth++;
7340 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7341 user_data, &list);
7342 ctxt->depth--;
7343
7344 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7345 ctxt->depth++;
7346 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7347 user_data, ctxt->depth, ent->URI,
7348 ent->ExternalID, &list);
7349 ctxt->depth--;
7350 } else {
7351 ret = XML_ERR_ENTITY_PE_INTERNAL;
7352 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7353 "invalid entity type found\n", NULL);
7354 }
7355
7356 /*
7357 * Store the number of entities needing parsing for this entity
7358 * content and do checkings
7359 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007360 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7361 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7362 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007363 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007364 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007365 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007366 return;
7367 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007368 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007369 xmlFreeNodeList(list);
7370 return;
7371 }
Owen Taylor3473f882001-02-23 17:55:21 +00007372
Daniel Veillard0161e632008-08-28 15:36:32 +00007373 if ((ret == XML_ERR_OK) && (list != NULL)) {
7374 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7375 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7376 (ent->children == NULL)) {
7377 ent->children = list;
7378 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007379 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007380 * Prune it directly in the generated document
7381 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007382 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007383 if (((list->type == XML_TEXT_NODE) &&
7384 (list->next == NULL)) ||
7385 (ctxt->parseMode == XML_PARSE_READER)) {
7386 list->parent = (xmlNodePtr) ent;
7387 list = NULL;
7388 ent->owner = 1;
7389 } else {
7390 ent->owner = 0;
7391 while (list != NULL) {
7392 list->parent = (xmlNodePtr) ctxt->node;
7393 list->doc = ctxt->myDoc;
7394 if (list->next == NULL)
7395 ent->last = list;
7396 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007397 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007398 list = ent->children;
7399#ifdef LIBXML_LEGACY_ENABLED
7400 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7401 xmlAddEntityReference(ent, list, NULL);
7402#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007403 }
7404 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007405 ent->owner = 1;
7406 while (list != NULL) {
7407 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007408 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007409 if (list->next == NULL)
7410 ent->last = list;
7411 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007412 }
7413 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007414 } else {
7415 xmlFreeNodeList(list);
7416 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007417 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007418 } else if ((ret != XML_ERR_OK) &&
7419 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7420 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7421 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007422 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007423 } else if (list != NULL) {
7424 xmlFreeNodeList(list);
7425 list = NULL;
7426 }
7427 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007428 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007429 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007430 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007431 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007432
Daniel Veillard0161e632008-08-28 15:36:32 +00007433 /*
7434 * Now that the entity content has been gathered
7435 * provide it to the application, this can take different forms based
7436 * on the parsing modes.
7437 */
7438 if (ent->children == NULL) {
7439 /*
7440 * Probably running in SAX mode and the callbacks don't
7441 * build the entity content. So unless we already went
7442 * though parsing for first checking go though the entity
7443 * content to generate callbacks associated to the entity
7444 */
7445 if (was_checked != 0) {
7446 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007447 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007448 * This is a bit hackish but this seems the best
7449 * way to make sure both SAX and DOM entity support
7450 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007451 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007452 if (ctxt->userData == ctxt)
7453 user_data = NULL;
7454 else
7455 user_data = ctxt->userData;
7456
7457 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7458 ctxt->depth++;
7459 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7460 ent->content, user_data, NULL);
7461 ctxt->depth--;
7462 } else if (ent->etype ==
7463 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7464 ctxt->depth++;
7465 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7466 ctxt->sax, user_data, ctxt->depth,
7467 ent->URI, ent->ExternalID, NULL);
7468 ctxt->depth--;
7469 } else {
7470 ret = XML_ERR_ENTITY_PE_INTERNAL;
7471 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7472 "invalid entity type found\n", NULL);
7473 }
7474 if (ret == XML_ERR_ENTITY_LOOP) {
7475 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7476 return;
7477 }
7478 }
7479 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7480 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7481 /*
7482 * Entity reference callback comes second, it's somewhat
7483 * superfluous but a compatibility to historical behaviour
7484 */
7485 ctxt->sax->reference(ctxt->userData, ent->name);
7486 }
7487 return;
7488 }
7489
7490 /*
7491 * If we didn't get any children for the entity being built
7492 */
7493 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7494 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7495 /*
7496 * Create a node.
7497 */
7498 ctxt->sax->reference(ctxt->userData, ent->name);
7499 return;
7500 }
7501
7502 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7503 /*
7504 * There is a problem on the handling of _private for entities
7505 * (bug 155816): Should we copy the content of the field from
7506 * the entity (possibly overwriting some value set by the user
7507 * when a copy is created), should we leave it alone, or should
7508 * we try to take care of different situations? The problem
7509 * is exacerbated by the usage of this field by the xmlReader.
7510 * To fix this bug, we look at _private on the created node
7511 * and, if it's NULL, we copy in whatever was in the entity.
7512 * If it's not NULL we leave it alone. This is somewhat of a
7513 * hack - maybe we should have further tests to determine
7514 * what to do.
7515 */
7516 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7517 /*
7518 * Seems we are generating the DOM content, do
7519 * a simple tree copy for all references except the first
7520 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007521 */
7522 if (((list == NULL) && (ent->owner == 0)) ||
7523 (ctxt->parseMode == XML_PARSE_READER)) {
7524 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7525
7526 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007527 * We are copying here, make sure there is no abuse
7528 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007529 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007530 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7531 return;
7532
7533 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007534 * when operating on a reader, the entities definitions
7535 * are always owning the entities subtree.
7536 if (ctxt->parseMode == XML_PARSE_READER)
7537 ent->owner = 1;
7538 */
7539
7540 cur = ent->children;
7541 while (cur != NULL) {
7542 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7543 if (nw != NULL) {
7544 if (nw->_private == NULL)
7545 nw->_private = cur->_private;
7546 if (firstChild == NULL){
7547 firstChild = nw;
7548 }
7549 nw = xmlAddChild(ctxt->node, nw);
7550 }
7551 if (cur == ent->last) {
7552 /*
7553 * needed to detect some strange empty
7554 * node cases in the reader tests
7555 */
7556 if ((ctxt->parseMode == XML_PARSE_READER) &&
7557 (nw != NULL) &&
7558 (nw->type == XML_ELEMENT_NODE) &&
7559 (nw->children == NULL))
7560 nw->extra = 1;
7561
7562 break;
7563 }
7564 cur = cur->next;
7565 }
7566#ifdef LIBXML_LEGACY_ENABLED
7567 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7568 xmlAddEntityReference(ent, firstChild, nw);
7569#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007570 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007571 xmlNodePtr nw = NULL, cur, next, last,
7572 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007573
7574 /*
7575 * We are copying here, make sure there is no abuse
7576 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007577 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007578 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7579 return;
7580
Daniel Veillard0161e632008-08-28 15:36:32 +00007581 /*
7582 * Copy the entity child list and make it the new
7583 * entity child list. The goal is to make sure any
7584 * ID or REF referenced will be the one from the
7585 * document content and not the entity copy.
7586 */
7587 cur = ent->children;
7588 ent->children = NULL;
7589 last = ent->last;
7590 ent->last = NULL;
7591 while (cur != NULL) {
7592 next = cur->next;
7593 cur->next = NULL;
7594 cur->parent = NULL;
7595 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7596 if (nw != NULL) {
7597 if (nw->_private == NULL)
7598 nw->_private = cur->_private;
7599 if (firstChild == NULL){
7600 firstChild = cur;
7601 }
7602 xmlAddChild((xmlNodePtr) ent, nw);
7603 xmlAddChild(ctxt->node, cur);
7604 }
7605 if (cur == last)
7606 break;
7607 cur = next;
7608 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007609 if (ent->owner == 0)
7610 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007611#ifdef LIBXML_LEGACY_ENABLED
7612 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7613 xmlAddEntityReference(ent, firstChild, nw);
7614#endif /* LIBXML_LEGACY_ENABLED */
7615 } else {
7616 const xmlChar *nbktext;
7617
7618 /*
7619 * the name change is to avoid coalescing of the
7620 * node with a possible previous text one which
7621 * would make ent->children a dangling pointer
7622 */
7623 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7624 -1);
7625 if (ent->children->type == XML_TEXT_NODE)
7626 ent->children->name = nbktext;
7627 if ((ent->last != ent->children) &&
7628 (ent->last->type == XML_TEXT_NODE))
7629 ent->last->name = nbktext;
7630 xmlAddChildList(ctxt->node, ent->children);
7631 }
7632
7633 /*
7634 * This is to avoid a nasty side effect, see
7635 * characters() in SAX.c
7636 */
7637 ctxt->nodemem = 0;
7638 ctxt->nodelen = 0;
7639 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007640 }
7641 }
7642}
7643
7644/**
7645 * xmlParseEntityRef:
7646 * @ctxt: an XML parser context
7647 *
7648 * parse ENTITY references declarations
7649 *
7650 * [68] EntityRef ::= '&' Name ';'
7651 *
7652 * [ WFC: Entity Declared ]
7653 * In a document without any DTD, a document with only an internal DTD
7654 * subset which contains no parameter entity references, or a document
7655 * with "standalone='yes'", the Name given in the entity reference
7656 * must match that in an entity declaration, except that well-formed
7657 * documents need not declare any of the following entities: amp, lt,
7658 * gt, apos, quot. The declaration of a parameter entity must precede
7659 * any reference to it. Similarly, the declaration of a general entity
7660 * must precede any reference to it which appears in a default value in an
7661 * attribute-list declaration. Note that if entities are declared in the
7662 * external subset or in external parameter entities, a non-validating
7663 * processor is not obligated to read and process their declarations;
7664 * for such documents, the rule that an entity must be declared is a
7665 * well-formedness constraint only if standalone='yes'.
7666 *
7667 * [ WFC: Parsed Entity ]
7668 * An entity reference must not contain the name of an unparsed entity
7669 *
7670 * Returns the xmlEntityPtr if found, or NULL otherwise.
7671 */
7672xmlEntityPtr
7673xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007674 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007675 xmlEntityPtr ent = NULL;
7676
7677 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007678 if (ctxt->instate == XML_PARSER_EOF)
7679 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007680
Daniel Veillard0161e632008-08-28 15:36:32 +00007681 if (RAW != '&')
7682 return(NULL);
7683 NEXT;
7684 name = xmlParseName(ctxt);
7685 if (name == NULL) {
7686 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7687 "xmlParseEntityRef: no name\n");
7688 return(NULL);
7689 }
7690 if (RAW != ';') {
7691 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7692 return(NULL);
7693 }
7694 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007695
Daniel Veillard0161e632008-08-28 15:36:32 +00007696 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007697 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007698 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007699 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7700 ent = xmlGetPredefinedEntity(name);
7701 if (ent != NULL)
7702 return(ent);
7703 }
Owen Taylor3473f882001-02-23 17:55:21 +00007704
Daniel Veillard0161e632008-08-28 15:36:32 +00007705 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007706 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007707 */
7708 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007709
Daniel Veillard0161e632008-08-28 15:36:32 +00007710 /*
7711 * Ask first SAX for entity resolution, otherwise try the
7712 * entities which may have stored in the parser context.
7713 */
7714 if (ctxt->sax != NULL) {
7715 if (ctxt->sax->getEntity != NULL)
7716 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007717 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007718 (ctxt->options & XML_PARSE_OLDSAX))
7719 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007720 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7721 (ctxt->userData==ctxt)) {
7722 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007723 }
7724 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007725 if (ctxt->instate == XML_PARSER_EOF)
7726 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007727 /*
7728 * [ WFC: Entity Declared ]
7729 * In a document without any DTD, a document with only an
7730 * internal DTD subset which contains no parameter entity
7731 * references, or a document with "standalone='yes'", the
7732 * Name given in the entity reference must match that in an
7733 * entity declaration, except that well-formed documents
7734 * need not declare any of the following entities: amp, lt,
7735 * gt, apos, quot.
7736 * The declaration of a parameter entity must precede any
7737 * reference to it.
7738 * Similarly, the declaration of a general entity must
7739 * precede any reference to it which appears in a default
7740 * value in an attribute-list declaration. Note that if
7741 * entities are declared in the external subset or in
7742 * external parameter entities, a non-validating processor
7743 * is not obligated to read and process their declarations;
7744 * for such documents, the rule that an entity must be
7745 * declared is a well-formedness constraint only if
7746 * standalone='yes'.
7747 */
7748 if (ent == NULL) {
7749 if ((ctxt->standalone == 1) ||
7750 ((ctxt->hasExternalSubset == 0) &&
7751 (ctxt->hasPErefs == 0))) {
7752 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7753 "Entity '%s' not defined\n", name);
7754 } else {
7755 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7756 "Entity '%s' not defined\n", name);
7757 if ((ctxt->inSubset == 0) &&
7758 (ctxt->sax != NULL) &&
7759 (ctxt->sax->reference != NULL)) {
7760 ctxt->sax->reference(ctxt->userData, name);
7761 }
7762 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007763 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007764 ctxt->valid = 0;
7765 }
7766
7767 /*
7768 * [ WFC: Parsed Entity ]
7769 * An entity reference must not contain the name of an
7770 * unparsed entity
7771 */
7772 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7773 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7774 "Entity reference to unparsed entity %s\n", name);
7775 }
7776
7777 /*
7778 * [ WFC: No External Entity References ]
7779 * Attribute values cannot contain direct or indirect
7780 * entity references to external entities.
7781 */
7782 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7783 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7784 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7785 "Attribute references external entity '%s'\n", name);
7786 }
7787 /*
7788 * [ WFC: No < in Attribute Values ]
7789 * The replacement text of any entity referred to directly or
7790 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007791 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007792 */
7793 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007794 (ent != NULL) &&
7795 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007796 if (((ent->checked & 1) || (ent->checked == 0)) &&
7797 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007798 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7799 "'<' in entity '%s' is not allowed in attributes values\n", name);
7800 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007801 }
7802
7803 /*
7804 * Internal check, no parameter entities here ...
7805 */
7806 else {
7807 switch (ent->etype) {
7808 case XML_INTERNAL_PARAMETER_ENTITY:
7809 case XML_EXTERNAL_PARAMETER_ENTITY:
7810 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7811 "Attempt to reference the parameter entity '%s'\n",
7812 name);
7813 break;
7814 default:
7815 break;
7816 }
7817 }
7818
7819 /*
7820 * [ WFC: No Recursion ]
7821 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007822 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007823 * Done somewhere else
7824 */
Owen Taylor3473f882001-02-23 17:55:21 +00007825 return(ent);
7826}
7827
7828/**
7829 * xmlParseStringEntityRef:
7830 * @ctxt: an XML parser context
7831 * @str: a pointer to an index in the string
7832 *
7833 * parse ENTITY references declarations, but this version parses it from
7834 * a string value.
7835 *
7836 * [68] EntityRef ::= '&' Name ';'
7837 *
7838 * [ WFC: Entity Declared ]
7839 * In a document without any DTD, a document with only an internal DTD
7840 * subset which contains no parameter entity references, or a document
7841 * with "standalone='yes'", the Name given in the entity reference
7842 * must match that in an entity declaration, except that well-formed
7843 * documents need not declare any of the following entities: amp, lt,
7844 * gt, apos, quot. The declaration of a parameter entity must precede
7845 * any reference to it. Similarly, the declaration of a general entity
7846 * must precede any reference to it which appears in a default value in an
7847 * attribute-list declaration. Note that if entities are declared in the
7848 * external subset or in external parameter entities, a non-validating
7849 * processor is not obligated to read and process their declarations;
7850 * for such documents, the rule that an entity must be declared is a
7851 * well-formedness constraint only if standalone='yes'.
7852 *
7853 * [ WFC: Parsed Entity ]
7854 * An entity reference must not contain the name of an unparsed entity
7855 *
7856 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7857 * is updated to the current location in the string.
7858 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007859static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007860xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7861 xmlChar *name;
7862 const xmlChar *ptr;
7863 xmlChar cur;
7864 xmlEntityPtr ent = NULL;
7865
7866 if ((str == NULL) || (*str == NULL))
7867 return(NULL);
7868 ptr = *str;
7869 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007870 if (cur != '&')
7871 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007872
Daniel Veillard0161e632008-08-28 15:36:32 +00007873 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007874 name = xmlParseStringName(ctxt, &ptr);
7875 if (name == NULL) {
7876 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7877 "xmlParseStringEntityRef: no name\n");
7878 *str = ptr;
7879 return(NULL);
7880 }
7881 if (*ptr != ';') {
7882 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007883 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007884 *str = ptr;
7885 return(NULL);
7886 }
7887 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007888
Owen Taylor3473f882001-02-23 17:55:21 +00007889
Daniel Veillard0161e632008-08-28 15:36:32 +00007890 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007891 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007892 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007893 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7894 ent = xmlGetPredefinedEntity(name);
7895 if (ent != NULL) {
7896 xmlFree(name);
7897 *str = ptr;
7898 return(ent);
7899 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007900 }
Owen Taylor3473f882001-02-23 17:55:21 +00007901
Daniel Veillard0161e632008-08-28 15:36:32 +00007902 /*
7903 * Increate the number of entity references parsed
7904 */
7905 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007906
Daniel Veillard0161e632008-08-28 15:36:32 +00007907 /*
7908 * Ask first SAX for entity resolution, otherwise try the
7909 * entities which may have stored in the parser context.
7910 */
7911 if (ctxt->sax != NULL) {
7912 if (ctxt->sax->getEntity != NULL)
7913 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007914 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7915 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007916 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7917 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007918 }
7919 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007920 if (ctxt->instate == XML_PARSER_EOF) {
7921 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007922 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007923 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007924
7925 /*
7926 * [ WFC: Entity Declared ]
7927 * In a document without any DTD, a document with only an
7928 * internal DTD subset which contains no parameter entity
7929 * references, or a document with "standalone='yes'", the
7930 * Name given in the entity reference must match that in an
7931 * entity declaration, except that well-formed documents
7932 * need not declare any of the following entities: amp, lt,
7933 * gt, apos, quot.
7934 * The declaration of a parameter entity must precede any
7935 * reference to it.
7936 * Similarly, the declaration of a general entity must
7937 * precede any reference to it which appears in a default
7938 * value in an attribute-list declaration. Note that if
7939 * entities are declared in the external subset or in
7940 * external parameter entities, a non-validating processor
7941 * is not obligated to read and process their declarations;
7942 * for such documents, the rule that an entity must be
7943 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007944 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007945 */
7946 if (ent == NULL) {
7947 if ((ctxt->standalone == 1) ||
7948 ((ctxt->hasExternalSubset == 0) &&
7949 (ctxt->hasPErefs == 0))) {
7950 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7951 "Entity '%s' not defined\n", name);
7952 } else {
7953 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7954 "Entity '%s' not defined\n",
7955 name);
7956 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007957 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007958 /* TODO ? check regressions ctxt->valid = 0; */
7959 }
7960
7961 /*
7962 * [ WFC: Parsed Entity ]
7963 * An entity reference must not contain the name of an
7964 * unparsed entity
7965 */
7966 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7967 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7968 "Entity reference to unparsed entity %s\n", name);
7969 }
7970
7971 /*
7972 * [ WFC: No External Entity References ]
7973 * Attribute values cannot contain direct or indirect
7974 * entity references to external entities.
7975 */
7976 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7977 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7978 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7979 "Attribute references external entity '%s'\n", name);
7980 }
7981 /*
7982 * [ WFC: No < in Attribute Values ]
7983 * The replacement text of any entity referred to directly or
7984 * indirectly in an attribute value (other than "&lt;") must
7985 * not contain a <.
7986 */
7987 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7988 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007989 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007990 (xmlStrchr(ent->content, '<'))) {
7991 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7992 "'<' in entity '%s' is not allowed in attributes values\n",
7993 name);
7994 }
7995
7996 /*
7997 * Internal check, no parameter entities here ...
7998 */
7999 else {
8000 switch (ent->etype) {
8001 case XML_INTERNAL_PARAMETER_ENTITY:
8002 case XML_EXTERNAL_PARAMETER_ENTITY:
8003 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
8004 "Attempt to reference the parameter entity '%s'\n",
8005 name);
8006 break;
8007 default:
8008 break;
8009 }
8010 }
8011
8012 /*
8013 * [ WFC: No Recursion ]
8014 * A parsed entity must not contain a recursive reference
8015 * to itself, either directly or indirectly.
8016 * Done somewhere else
8017 */
8018
8019 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008020 *str = ptr;
8021 return(ent);
8022}
8023
8024/**
8025 * xmlParsePEReference:
8026 * @ctxt: an XML parser context
8027 *
8028 * parse PEReference declarations
8029 * The entity content is handled directly by pushing it's content as
8030 * a new input stream.
8031 *
8032 * [69] PEReference ::= '%' Name ';'
8033 *
8034 * [ WFC: No Recursion ]
8035 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008036 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008037 *
8038 * [ WFC: Entity Declared ]
8039 * In a document without any DTD, a document with only an internal DTD
8040 * subset which contains no parameter entity references, or a document
8041 * with "standalone='yes'", ... ... The declaration of a parameter
8042 * entity must precede any reference to it...
8043 *
8044 * [ VC: Entity Declared ]
8045 * In a document with an external subset or external parameter entities
8046 * with "standalone='no'", ... ... The declaration of a parameter entity
8047 * must precede any reference to it...
8048 *
8049 * [ WFC: In DTD ]
8050 * Parameter-entity references may only appear in the DTD.
8051 * NOTE: misleading but this is handled.
8052 */
8053void
Daniel Veillard8f597c32003-10-06 08:19:27 +00008054xmlParsePEReference(xmlParserCtxtPtr ctxt)
8055{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008056 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008057 xmlEntityPtr entity = NULL;
8058 xmlParserInputPtr input;
8059
Daniel Veillard0161e632008-08-28 15:36:32 +00008060 if (RAW != '%')
8061 return;
8062 NEXT;
8063 name = xmlParseName(ctxt);
8064 if (name == NULL) {
8065 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8066 "xmlParsePEReference: no name\n");
8067 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008068 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008069 if (RAW != ';') {
8070 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8071 return;
8072 }
8073
8074 NEXT;
8075
8076 /*
8077 * Increate the number of entity references parsed
8078 */
8079 ctxt->nbentities++;
8080
8081 /*
8082 * Request the entity from SAX
8083 */
8084 if ((ctxt->sax != NULL) &&
8085 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008086 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8087 if (ctxt->instate == XML_PARSER_EOF)
8088 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00008089 if (entity == NULL) {
8090 /*
8091 * [ WFC: Entity Declared ]
8092 * In a document without any DTD, a document with only an
8093 * internal DTD subset which contains no parameter entity
8094 * references, or a document with "standalone='yes'", ...
8095 * ... The declaration of a parameter entity must precede
8096 * any reference to it...
8097 */
8098 if ((ctxt->standalone == 1) ||
8099 ((ctxt->hasExternalSubset == 0) &&
8100 (ctxt->hasPErefs == 0))) {
8101 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8102 "PEReference: %%%s; not found\n",
8103 name);
8104 } else {
8105 /*
8106 * [ VC: Entity Declared ]
8107 * In a document with an external subset or external
8108 * parameter entities with "standalone='no'", ...
8109 * ... The declaration of a parameter entity must
8110 * precede any reference to it...
8111 */
8112 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8113 "PEReference: %%%s; not found\n",
8114 name, NULL);
8115 ctxt->valid = 0;
8116 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008117 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008118 } else {
8119 /*
8120 * Internal checking in case the entity quest barfed
8121 */
8122 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8123 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8124 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8125 "Internal: %%%s; is not a parameter entity\n",
8126 name, NULL);
8127 } else if (ctxt->input->free != deallocblankswrapper) {
8128 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8129 if (xmlPushInput(ctxt, input) < 0)
8130 return;
8131 } else {
Neel Mehta90ccb582017-04-07 17:43:02 +02008132 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8133 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8134 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8135 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8136 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8137 (ctxt->replaceEntities == 0) &&
8138 (ctxt->validate == 0))
8139 return;
8140
Daniel Veillard0161e632008-08-28 15:36:32 +00008141 /*
8142 * TODO !!!
8143 * handle the extra spaces added before and after
8144 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8145 */
8146 input = xmlNewEntityInputStream(ctxt, entity);
8147 if (xmlPushInput(ctxt, input) < 0)
8148 return;
8149 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8150 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8151 (IS_BLANK_CH(NXT(5)))) {
8152 xmlParseTextDecl(ctxt);
8153 if (ctxt->errNo ==
8154 XML_ERR_UNSUPPORTED_ENCODING) {
8155 /*
8156 * The XML REC instructs us to stop parsing
8157 * right here
8158 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08008159 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00008160 return;
8161 }
8162 }
8163 }
8164 }
8165 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008166}
8167
8168/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008169 * xmlLoadEntityContent:
8170 * @ctxt: an XML parser context
8171 * @entity: an unloaded system entity
8172 *
8173 * Load the original content of the given system entity from the
8174 * ExternalID/SystemID given. This is to be used for Included in Literal
8175 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8176 *
8177 * Returns 0 in case of success and -1 in case of failure
8178 */
8179static int
8180xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8181 xmlParserInputPtr input;
8182 xmlBufferPtr buf;
8183 int l, c;
8184 int count = 0;
8185
8186 if ((ctxt == NULL) || (entity == NULL) ||
8187 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8188 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8189 (entity->content != NULL)) {
8190 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8191 "xmlLoadEntityContent parameter error");
8192 return(-1);
8193 }
8194
8195 if (xmlParserDebugEntities)
8196 xmlGenericError(xmlGenericErrorContext,
8197 "Reading %s entity content input\n", entity->name);
8198
8199 buf = xmlBufferCreate();
8200 if (buf == NULL) {
8201 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8202 "xmlLoadEntityContent parameter error");
8203 return(-1);
8204 }
8205
8206 input = xmlNewEntityInputStream(ctxt, entity);
8207 if (input == NULL) {
8208 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8209 "xmlLoadEntityContent input error");
8210 xmlBufferFree(buf);
8211 return(-1);
8212 }
8213
8214 /*
8215 * Push the entity as the current input, read char by char
8216 * saving to the buffer until the end of the entity or an error
8217 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008218 if (xmlPushInput(ctxt, input) < 0) {
8219 xmlBufferFree(buf);
8220 return(-1);
8221 }
8222
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008223 GROW;
8224 c = CUR_CHAR(l);
8225 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8226 (IS_CHAR(c))) {
8227 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008228 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008229 count = 0;
8230 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008231 if (ctxt->instate == XML_PARSER_EOF) {
8232 xmlBufferFree(buf);
8233 return(-1);
8234 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008235 }
8236 NEXTL(l);
8237 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008238 if (c == 0) {
8239 count = 0;
8240 GROW;
8241 if (ctxt->instate == XML_PARSER_EOF) {
8242 xmlBufferFree(buf);
8243 return(-1);
8244 }
8245 c = CUR_CHAR(l);
8246 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008247 }
8248
8249 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8250 xmlPopInput(ctxt);
8251 } else if (!IS_CHAR(c)) {
8252 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8253 "xmlLoadEntityContent: invalid char value %d\n",
8254 c);
8255 xmlBufferFree(buf);
8256 return(-1);
8257 }
8258 entity->content = buf->content;
8259 buf->content = NULL;
8260 xmlBufferFree(buf);
8261
8262 return(0);
8263}
8264
8265/**
Owen Taylor3473f882001-02-23 17:55:21 +00008266 * xmlParseStringPEReference:
8267 * @ctxt: an XML parser context
8268 * @str: a pointer to an index in the string
8269 *
8270 * parse PEReference declarations
8271 *
8272 * [69] PEReference ::= '%' Name ';'
8273 *
8274 * [ WFC: No Recursion ]
8275 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008276 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008277 *
8278 * [ WFC: Entity Declared ]
8279 * In a document without any DTD, a document with only an internal DTD
8280 * subset which contains no parameter entity references, or a document
8281 * with "standalone='yes'", ... ... The declaration of a parameter
8282 * entity must precede any reference to it...
8283 *
8284 * [ VC: Entity Declared ]
8285 * In a document with an external subset or external parameter entities
8286 * with "standalone='no'", ... ... The declaration of a parameter entity
8287 * must precede any reference to it...
8288 *
8289 * [ WFC: In DTD ]
8290 * Parameter-entity references may only appear in the DTD.
8291 * NOTE: misleading but this is handled.
8292 *
8293 * Returns the string of the entity content.
8294 * str is updated to the current value of the index
8295 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008296static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008297xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8298 const xmlChar *ptr;
8299 xmlChar cur;
8300 xmlChar *name;
8301 xmlEntityPtr entity = NULL;
8302
8303 if ((str == NULL) || (*str == NULL)) return(NULL);
8304 ptr = *str;
8305 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008306 if (cur != '%')
8307 return(NULL);
8308 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008309 name = xmlParseStringName(ctxt, &ptr);
8310 if (name == NULL) {
8311 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8312 "xmlParseStringPEReference: no name\n");
8313 *str = ptr;
8314 return(NULL);
8315 }
8316 cur = *ptr;
8317 if (cur != ';') {
8318 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8319 xmlFree(name);
8320 *str = ptr;
8321 return(NULL);
8322 }
8323 ptr++;
8324
8325 /*
8326 * Increate the number of entity references parsed
8327 */
8328 ctxt->nbentities++;
8329
8330 /*
8331 * Request the entity from SAX
8332 */
8333 if ((ctxt->sax != NULL) &&
8334 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008335 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8336 if (ctxt->instate == XML_PARSER_EOF) {
8337 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008338 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008339 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008340 if (entity == NULL) {
8341 /*
8342 * [ WFC: Entity Declared ]
8343 * In a document without any DTD, a document with only an
8344 * internal DTD subset which contains no parameter entity
8345 * references, or a document with "standalone='yes'", ...
8346 * ... The declaration of a parameter entity must precede
8347 * any reference to it...
8348 */
8349 if ((ctxt->standalone == 1) ||
8350 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8351 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8352 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008353 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008354 /*
8355 * [ VC: Entity Declared ]
8356 * In a document with an external subset or external
8357 * parameter entities with "standalone='no'", ...
8358 * ... The declaration of a parameter entity must
8359 * precede any reference to it...
8360 */
8361 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8362 "PEReference: %%%s; not found\n",
8363 name, NULL);
8364 ctxt->valid = 0;
8365 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008366 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008367 } else {
8368 /*
8369 * Internal checking in case the entity quest barfed
8370 */
8371 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8372 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8373 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8374 "%%%s; is not a parameter entity\n",
8375 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008376 }
8377 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008378 ctxt->hasPErefs = 1;
8379 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008380 *str = ptr;
8381 return(entity);
8382}
8383
8384/**
8385 * xmlParseDocTypeDecl:
8386 * @ctxt: an XML parser context
8387 *
8388 * parse a DOCTYPE declaration
8389 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008390 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008391 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8392 *
8393 * [ VC: Root Element Type ]
8394 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008395 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008396 */
8397
8398void
8399xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008400 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008401 xmlChar *ExternalID = NULL;
8402 xmlChar *URI = NULL;
8403
8404 /*
8405 * We know that '<!DOCTYPE' has been detected.
8406 */
8407 SKIP(9);
8408
8409 SKIP_BLANKS;
8410
8411 /*
8412 * Parse the DOCTYPE name.
8413 */
8414 name = xmlParseName(ctxt);
8415 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008416 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8417 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008418 }
8419 ctxt->intSubName = name;
8420
8421 SKIP_BLANKS;
8422
8423 /*
8424 * Check for SystemID and ExternalID
8425 */
8426 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8427
8428 if ((URI != NULL) || (ExternalID != NULL)) {
8429 ctxt->hasExternalSubset = 1;
8430 }
8431 ctxt->extSubURI = URI;
8432 ctxt->extSubSystem = ExternalID;
8433
8434 SKIP_BLANKS;
8435
8436 /*
8437 * Create and update the internal subset.
8438 */
8439 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8440 (!ctxt->disableSAX))
8441 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008442 if (ctxt->instate == XML_PARSER_EOF)
8443 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008444
8445 /*
8446 * Is there any internal subset declarations ?
8447 * they are handled separately in xmlParseInternalSubset()
8448 */
8449 if (RAW == '[')
8450 return;
8451
8452 /*
8453 * We should be at the end of the DOCTYPE declaration.
8454 */
8455 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008456 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008457 }
8458 NEXT;
8459}
8460
8461/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008462 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008463 * @ctxt: an XML parser context
8464 *
8465 * parse the internal subset declaration
8466 *
8467 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8468 */
8469
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008470static void
Owen Taylor3473f882001-02-23 17:55:21 +00008471xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8472 /*
8473 * Is there any DTD definition ?
8474 */
8475 if (RAW == '[') {
8476 ctxt->instate = XML_PARSER_DTD;
8477 NEXT;
8478 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008479 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008480 * PEReferences.
8481 * Subsequence (markupdecl | PEReference | S)*
8482 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008483 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008484 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008485 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008486
8487 SKIP_BLANKS;
8488 xmlParseMarkupDecl(ctxt);
8489 xmlParsePEReference(ctxt);
8490
8491 /*
8492 * Pop-up of finished entities.
8493 */
8494 while ((RAW == 0) && (ctxt->inputNr > 1))
8495 xmlPopInput(ctxt);
8496
8497 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008498 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008499 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008500 break;
8501 }
8502 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008503 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008504 NEXT;
8505 SKIP_BLANKS;
8506 }
8507 }
8508
8509 /*
8510 * We should be at the end of the DOCTYPE declaration.
8511 */
8512 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008513 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008514 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008515 }
8516 NEXT;
8517}
8518
Daniel Veillard81273902003-09-30 00:43:48 +00008519#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008520/**
8521 * xmlParseAttribute:
8522 * @ctxt: an XML parser context
8523 * @value: a xmlChar ** used to store the value of the attribute
8524 *
8525 * parse an attribute
8526 *
8527 * [41] Attribute ::= Name Eq AttValue
8528 *
8529 * [ WFC: No External Entity References ]
8530 * Attribute values cannot contain direct or indirect entity references
8531 * to external entities.
8532 *
8533 * [ WFC: No < in Attribute Values ]
8534 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008535 * an attribute value (other than "&lt;") must not contain a <.
8536 *
Owen Taylor3473f882001-02-23 17:55:21 +00008537 * [ VC: Attribute Value Type ]
8538 * The attribute must have been declared; the value must be of the type
8539 * declared for it.
8540 *
8541 * [25] Eq ::= S? '=' S?
8542 *
8543 * With namespace:
8544 *
8545 * [NS 11] Attribute ::= QName Eq AttValue
8546 *
8547 * Also the case QName == xmlns:??? is handled independently as a namespace
8548 * definition.
8549 *
8550 * Returns the attribute name, and the value in *value.
8551 */
8552
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008553const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008554xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008555 const xmlChar *name;
8556 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008557
8558 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008559 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008560 name = xmlParseName(ctxt);
8561 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008562 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008563 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008564 return(NULL);
8565 }
8566
8567 /*
8568 * read the value
8569 */
8570 SKIP_BLANKS;
8571 if (RAW == '=') {
8572 NEXT;
8573 SKIP_BLANKS;
8574 val = xmlParseAttValue(ctxt);
8575 ctxt->instate = XML_PARSER_CONTENT;
8576 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008577 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008578 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008579 return(NULL);
8580 }
8581
8582 /*
8583 * Check that xml:lang conforms to the specification
8584 * No more registered as an error, just generate a warning now
8585 * since this was deprecated in XML second edition
8586 */
8587 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8588 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008589 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8590 "Malformed value for xml:lang : %s\n",
8591 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008592 }
8593 }
8594
8595 /*
8596 * Check that xml:space conforms to the specification
8597 */
8598 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8599 if (xmlStrEqual(val, BAD_CAST "default"))
8600 *(ctxt->space) = 0;
8601 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8602 *(ctxt->space) = 1;
8603 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008604 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008605"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008606 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008607 }
8608 }
8609
8610 *value = val;
8611 return(name);
8612}
8613
8614/**
8615 * xmlParseStartTag:
8616 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008617 *
Owen Taylor3473f882001-02-23 17:55:21 +00008618 * parse a start of tag either for rule element or
8619 * EmptyElement. In both case we don't parse the tag closing chars.
8620 *
8621 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8622 *
8623 * [ WFC: Unique Att Spec ]
8624 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008625 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008626 *
8627 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8628 *
8629 * [ WFC: Unique Att Spec ]
8630 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008631 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008632 *
8633 * With namespace:
8634 *
8635 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8636 *
8637 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8638 *
8639 * Returns the element name parsed
8640 */
8641
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008642const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008643xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008644 const xmlChar *name;
8645 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008646 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008647 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008648 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008649 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008650 int i;
8651
8652 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008653 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008654
8655 name = xmlParseName(ctxt);
8656 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008657 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008658 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008659 return(NULL);
8660 }
8661
8662 /*
8663 * Now parse the attributes, it ends up with the ending
8664 *
8665 * (S Attribute)* S?
8666 */
8667 SKIP_BLANKS;
8668 GROW;
8669
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008670 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008671 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008672 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008673 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008674 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008675
8676 attname = xmlParseAttribute(ctxt, &attvalue);
8677 if ((attname != NULL) && (attvalue != NULL)) {
8678 /*
8679 * [ WFC: Unique Att Spec ]
8680 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008681 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008682 */
8683 for (i = 0; i < nbatts;i += 2) {
8684 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008685 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008686 xmlFree(attvalue);
8687 goto failed;
8688 }
8689 }
Owen Taylor3473f882001-02-23 17:55:21 +00008690 /*
8691 * Add the pair to atts
8692 */
8693 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008694 maxatts = 22; /* allow for 10 attrs by default */
8695 atts = (const xmlChar **)
8696 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008697 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008699 if (attvalue != NULL)
8700 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008701 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008702 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008703 ctxt->atts = atts;
8704 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008705 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008706 const xmlChar **n;
8707
Owen Taylor3473f882001-02-23 17:55:21 +00008708 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008709 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008710 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008711 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008712 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008713 if (attvalue != NULL)
8714 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008715 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008716 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008717 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008718 ctxt->atts = atts;
8719 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008720 }
8721 atts[nbatts++] = attname;
8722 atts[nbatts++] = attvalue;
8723 atts[nbatts] = NULL;
8724 atts[nbatts + 1] = NULL;
8725 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008726 if (attvalue != NULL)
8727 xmlFree(attvalue);
8728 }
8729
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008730failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008731
Daniel Veillard3772de32002-12-17 10:31:45 +00008732 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008733 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8734 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008735 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008736 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8737 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008738 }
8739 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008740 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8741 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008742 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8743 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008744 break;
8745 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008746 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008747 GROW;
8748 }
8749
8750 /*
8751 * SAX: Start of Element !
8752 */
8753 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008754 (!ctxt->disableSAX)) {
8755 if (nbatts > 0)
8756 ctxt->sax->startElement(ctxt->userData, name, atts);
8757 else
8758 ctxt->sax->startElement(ctxt->userData, name, NULL);
8759 }
Owen Taylor3473f882001-02-23 17:55:21 +00008760
8761 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008762 /* Free only the content strings */
8763 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008764 if (atts[i] != NULL)
8765 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008766 }
8767 return(name);
8768}
8769
8770/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008772 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008773 * @line: line of the start tag
8774 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008775 *
8776 * parse an end of tag
8777 *
8778 * [42] ETag ::= '</' Name S? '>'
8779 *
8780 * With namespace
8781 *
8782 * [NS 9] ETag ::= '</' QName S? '>'
8783 */
8784
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008785static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008786xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008787 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008788
8789 GROW;
8790 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008791 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008792 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008793 return;
8794 }
8795 SKIP(2);
8796
Daniel Veillard46de64e2002-05-29 08:21:33 +00008797 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008798
8799 /*
8800 * We should definitely be at the ending "S? '>'" part
8801 */
8802 GROW;
8803 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008804 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008805 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008806 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008807 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008808
8809 /*
8810 * [ WFC: Element Type Match ]
8811 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008812 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008813 *
8814 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008815 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008816 if (name == NULL) name = BAD_CAST "unparseable";
8817 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008818 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008819 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008820 }
8821
8822 /*
8823 * SAX: End of Tag
8824 */
8825 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8826 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008827 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008828
Daniel Veillarde57ec792003-09-10 10:50:59 +00008829 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008830 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008831 return;
8832}
8833
8834/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008835 * xmlParseEndTag:
8836 * @ctxt: an XML parser context
8837 *
8838 * parse an end of tag
8839 *
8840 * [42] ETag ::= '</' Name S? '>'
8841 *
8842 * With namespace
8843 *
8844 * [NS 9] ETag ::= '</' QName S? '>'
8845 */
8846
8847void
8848xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 xmlParseEndTag1(ctxt, 0);
8850}
Daniel Veillard81273902003-09-30 00:43:48 +00008851#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008852
8853/************************************************************************
8854 * *
8855 * SAX 2 specific operations *
8856 * *
8857 ************************************************************************/
8858
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859/*
8860 * xmlGetNamespace:
8861 * @ctxt: an XML parser context
8862 * @prefix: the prefix to lookup
8863 *
8864 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008865 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008866 *
8867 * Returns the namespace name or NULL if not bound
8868 */
8869static const xmlChar *
8870xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8871 int i;
8872
Daniel Veillarde57ec792003-09-10 10:50:59 +00008873 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008874 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008875 if (ctxt->nsTab[i] == prefix) {
8876 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8877 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008878 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008879 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008880 return(NULL);
8881}
8882
8883/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008884 * xmlParseQName:
8885 * @ctxt: an XML parser context
8886 * @prefix: pointer to store the prefix part
8887 *
8888 * parse an XML Namespace QName
8889 *
8890 * [6] QName ::= (Prefix ':')? LocalPart
8891 * [7] Prefix ::= NCName
8892 * [8] LocalPart ::= NCName
8893 *
8894 * Returns the Name parsed or NULL
8895 */
8896
8897static const xmlChar *
8898xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8899 const xmlChar *l, *p;
8900
8901 GROW;
8902
8903 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008904 if (l == NULL) {
8905 if (CUR == ':') {
8906 l = xmlParseName(ctxt);
8907 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008908 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008909 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008910 *prefix = NULL;
8911 return(l);
8912 }
8913 }
8914 return(NULL);
8915 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008916 if (CUR == ':') {
8917 NEXT;
8918 p = l;
8919 l = xmlParseNCName(ctxt);
8920 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008921 xmlChar *tmp;
8922
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008923 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8924 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008925 l = xmlParseNmtoken(ctxt);
8926 if (l == NULL)
8927 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8928 else {
8929 tmp = xmlBuildQName(l, p, NULL, 0);
8930 xmlFree((char *)l);
8931 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008932 p = xmlDictLookup(ctxt->dict, tmp, -1);
8933 if (tmp != NULL) xmlFree(tmp);
8934 *prefix = NULL;
8935 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008936 }
8937 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008938 xmlChar *tmp;
8939
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008940 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8941 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008942 NEXT;
8943 tmp = (xmlChar *) xmlParseName(ctxt);
8944 if (tmp != NULL) {
8945 tmp = xmlBuildQName(tmp, l, NULL, 0);
8946 l = xmlDictLookup(ctxt->dict, tmp, -1);
8947 if (tmp != NULL) xmlFree(tmp);
8948 *prefix = p;
8949 return(l);
8950 }
8951 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8952 l = xmlDictLookup(ctxt->dict, tmp, -1);
8953 if (tmp != NULL) xmlFree(tmp);
8954 *prefix = p;
8955 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008956 }
8957 *prefix = p;
8958 } else
8959 *prefix = NULL;
8960 return(l);
8961}
8962
8963/**
8964 * xmlParseQNameAndCompare:
8965 * @ctxt: an XML parser context
8966 * @name: the localname
8967 * @prefix: the prefix, if any.
8968 *
8969 * parse an XML name and compares for match
8970 * (specialized for endtag parsing)
8971 *
8972 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8973 * and the name for mismatch
8974 */
8975
8976static const xmlChar *
8977xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8978 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008979 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008980 const xmlChar *in;
8981 const xmlChar *ret;
8982 const xmlChar *prefix2;
8983
8984 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8985
8986 GROW;
8987 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008988
Daniel Veillard0fb18932003-09-07 09:14:37 +00008989 cmp = prefix;
8990 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008991 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008992 ++cmp;
8993 }
8994 if ((*cmp == 0) && (*in == ':')) {
8995 in++;
8996 cmp = name;
8997 while (*in != 0 && *in == *cmp) {
8998 ++in;
8999 ++cmp;
9000 }
William M. Brack76e95df2003-10-18 16:20:14 +00009001 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009002 /* success */
9003 ctxt->input->cur = in;
9004 return((const xmlChar*) 1);
9005 }
9006 }
9007 /*
9008 * all strings coms from the dictionary, equality can be done directly
9009 */
9010 ret = xmlParseQName (ctxt, &prefix2);
9011 if ((ret == name) && (prefix == prefix2))
9012 return((const xmlChar*) 1);
9013 return ret;
9014}
9015
9016/**
9017 * xmlParseAttValueInternal:
9018 * @ctxt: an XML parser context
9019 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009020 * @alloc: whether the attribute was reallocated as a new string
9021 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00009022 *
9023 * parse a value for an attribute.
9024 * NOTE: if no normalization is needed, the routine will return pointers
9025 * directly from the data buffer.
9026 *
9027 * 3.3.3 Attribute-Value Normalization:
9028 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009029 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009030 * - a character reference is processed by appending the referenced
9031 * character to the attribute value
9032 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009033 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00009034 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9035 * appending #x20 to the normalized value, except that only a single
9036 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009037 * parsed entity or the literal entity value of an internal parsed entity
9038 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00009039 * If the declared value is not CDATA, then the XML processor must further
9040 * process the normalized attribute value by discarding any leading and
9041 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009042 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009043 * All attributes for which no declaration has been read should be treated
9044 * by a non-validating parser as if declared CDATA.
9045 *
9046 * Returns the AttValue parsed or NULL. The value has to be freed by the
9047 * caller if it was copied, this can be detected by val[*len] == 0.
9048 */
9049
9050static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009051xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9052 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009053{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009054 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009055 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009056 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08009057 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009058
9059 GROW;
9060 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08009061 line = ctxt->input->line;
9062 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009063 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009064 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009065 return (NULL);
9066 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009067 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009068
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009069 /*
9070 * try to handle in this routine the most common case where no
9071 * allocation of a new string is required and where content is
9072 * pure ASCII.
9073 */
9074 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009075 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009076 end = ctxt->input->end;
9077 start = in;
9078 if (in >= end) {
9079 const xmlChar *oldbase = ctxt->input->base;
9080 GROW;
9081 if (oldbase != ctxt->input->base) {
9082 long delta = ctxt->input->base - oldbase;
9083 start = start + delta;
9084 in = in + delta;
9085 }
9086 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009087 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009088 if (normalize) {
9089 /*
9090 * Skip any leading spaces
9091 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009092 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009093 ((*in == 0x20) || (*in == 0x9) ||
9094 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009095 if (*in == 0xA) {
9096 line++; col = 1;
9097 } else {
9098 col++;
9099 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009100 in++;
9101 start = in;
9102 if (in >= end) {
9103 const xmlChar *oldbase = ctxt->input->base;
9104 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009105 if (ctxt->instate == XML_PARSER_EOF)
9106 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009107 if (oldbase != ctxt->input->base) {
9108 long delta = ctxt->input->base - oldbase;
9109 start = start + delta;
9110 in = in + delta;
9111 }
9112 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009113 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9114 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9115 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009116 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009117 return(NULL);
9118 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009119 }
9120 }
9121 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9122 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009123 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009124 if ((*in++ == 0x20) && (*in == 0x20)) break;
9125 if (in >= end) {
9126 const xmlChar *oldbase = ctxt->input->base;
9127 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009128 if (ctxt->instate == XML_PARSER_EOF)
9129 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009130 if (oldbase != ctxt->input->base) {
9131 long delta = ctxt->input->base - oldbase;
9132 start = start + delta;
9133 in = in + delta;
9134 }
9135 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009136 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9137 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9138 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009139 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009140 return(NULL);
9141 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009142 }
9143 }
9144 last = in;
9145 /*
9146 * skip the trailing blanks
9147 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009148 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009149 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009150 ((*in == 0x20) || (*in == 0x9) ||
9151 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009152 if (*in == 0xA) {
9153 line++, col = 1;
9154 } else {
9155 col++;
9156 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009157 in++;
9158 if (in >= end) {
9159 const xmlChar *oldbase = ctxt->input->base;
9160 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009161 if (ctxt->instate == XML_PARSER_EOF)
9162 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009163 if (oldbase != ctxt->input->base) {
9164 long delta = ctxt->input->base - oldbase;
9165 start = start + delta;
9166 in = in + delta;
9167 last = last + delta;
9168 }
9169 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009170 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9171 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9172 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009173 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009174 return(NULL);
9175 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009176 }
9177 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009178 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9179 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9180 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009181 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009182 return(NULL);
9183 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009184 if (*in != limit) goto need_complex;
9185 } else {
9186 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9187 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9188 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009189 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009190 if (in >= end) {
9191 const xmlChar *oldbase = ctxt->input->base;
9192 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009193 if (ctxt->instate == XML_PARSER_EOF)
9194 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009195 if (oldbase != ctxt->input->base) {
9196 long delta = ctxt->input->base - oldbase;
9197 start = start + delta;
9198 in = in + delta;
9199 }
9200 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009201 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9202 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9203 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009204 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009205 return(NULL);
9206 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009207 }
9208 }
9209 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009210 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9211 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9212 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009213 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009214 return(NULL);
9215 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009216 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009217 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009218 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009219 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009221 *len = last - start;
9222 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009223 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009224 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009225 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009226 }
9227 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009228 ctxt->input->line = line;
9229 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009230 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009231 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009232need_complex:
9233 if (alloc) *alloc = 1;
9234 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009235}
9236
9237/**
9238 * xmlParseAttribute2:
9239 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009240 * @pref: the element prefix
9241 * @elem: the element name
9242 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009243 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009244 * @len: an int * to save the length of the attribute
9245 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009246 *
9247 * parse an attribute in the new SAX2 framework.
9248 *
9249 * Returns the attribute name, and the value in *value, .
9250 */
9251
9252static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009253xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009254 const xmlChar * pref, const xmlChar * elem,
9255 const xmlChar ** prefix, xmlChar ** value,
9256 int *len, int *alloc)
9257{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009258 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009259 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009260 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009261
9262 *value = NULL;
9263 GROW;
9264 name = xmlParseQName(ctxt, prefix);
9265 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009266 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9267 "error parsing attribute name\n");
9268 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009269 }
9270
9271 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009272 * get the type if needed
9273 */
9274 if (ctxt->attsSpecial != NULL) {
9275 int type;
9276
9277 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009278 pref, elem, *prefix, name);
9279 if (type != 0)
9280 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009281 }
9282
9283 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009284 * read the value
9285 */
9286 SKIP_BLANKS;
9287 if (RAW == '=') {
9288 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009289 SKIP_BLANKS;
9290 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9291 if (normalize) {
9292 /*
9293 * Sometimes a second normalisation pass for spaces is needed
9294 * but that only happens if charrefs or entities refernces
9295 * have been used in the attribute value, i.e. the attribute
9296 * value have been extracted in an allocated string already.
9297 */
9298 if (*alloc) {
9299 const xmlChar *val2;
9300
9301 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009302 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009303 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009304 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009305 }
9306 }
9307 }
9308 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009309 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009310 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9311 "Specification mandate value for attribute %s\n",
9312 name);
9313 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009314 }
9315
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009316 if (*prefix == ctxt->str_xml) {
9317 /*
9318 * Check that xml:lang conforms to the specification
9319 * No more registered as an error, just generate a warning now
9320 * since this was deprecated in XML second edition
9321 */
9322 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9323 internal_val = xmlStrndup(val, *len);
9324 if (!xmlCheckLanguageID(internal_val)) {
9325 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9326 "Malformed value for xml:lang : %s\n",
9327 internal_val, NULL);
9328 }
9329 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009330
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009331 /*
9332 * Check that xml:space conforms to the specification
9333 */
9334 if (xmlStrEqual(name, BAD_CAST "space")) {
9335 internal_val = xmlStrndup(val, *len);
9336 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9337 *(ctxt->space) = 0;
9338 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9339 *(ctxt->space) = 1;
9340 else {
9341 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9342 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9343 internal_val, NULL);
9344 }
9345 }
9346 if (internal_val) {
9347 xmlFree(internal_val);
9348 }
9349 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009350
9351 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009352 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009353}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009354/**
9355 * xmlParseStartTag2:
9356 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009357 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009358 * parse a start of tag either for rule element or
9359 * EmptyElement. In both case we don't parse the tag closing chars.
9360 * This routine is called when running SAX2 parsing
9361 *
9362 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9363 *
9364 * [ WFC: Unique Att Spec ]
9365 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009366 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009367 *
9368 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9369 *
9370 * [ WFC: Unique Att Spec ]
9371 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009372 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009373 *
9374 * With namespace:
9375 *
9376 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9377 *
9378 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9379 *
9380 * Returns the element name parsed
9381 */
9382
9383static const xmlChar *
9384xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009385 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009386 const xmlChar *localname;
9387 const xmlChar *prefix;
9388 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009389 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009390 const xmlChar *nsname;
9391 xmlChar *attvalue;
9392 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009393 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009394 int nratts, nbatts, nbdef;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009395 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009396 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009397 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009398
9399 if (RAW != '<') return(NULL);
9400 NEXT1;
9401
9402 /*
9403 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9404 * point since the attribute values may be stored as pointers to
9405 * the buffer and calling SHRINK would destroy them !
9406 * The Shrinking is only possible once the full set of attribute
9407 * callbacks have been done.
9408 */
9409 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009410 cur = ctxt->input->cur - ctxt->input->base;
9411 nbatts = 0;
9412 nratts = 0;
9413 nbdef = 0;
9414 nbNs = 0;
9415 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009416 /* Forget any namespaces added during an earlier parse of this element. */
9417 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009418
9419 localname = xmlParseQName(ctxt, &prefix);
9420 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009421 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9422 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009423 return(NULL);
9424 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009425 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009426
9427 /*
9428 * Now parse the attributes, it ends up with the ending
9429 *
9430 * (S Attribute)* S?
9431 */
9432 SKIP_BLANKS;
9433 GROW;
9434
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009435 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009436 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009437 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009438 const xmlChar *q = CUR_PTR;
9439 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009440 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009441
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009442 attname = xmlParseAttribute2(ctxt, prefix, localname,
9443 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009444 if ((attname == NULL) || (attvalue == NULL))
9445 goto next_attr;
9446 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009447
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009448 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9449 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9450 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009451
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009452 if (URL == NULL) {
9453 xmlErrMemory(ctxt, "dictionary allocation failure");
9454 if ((attvalue != NULL) && (alloc != 0))
9455 xmlFree(attvalue);
9456 return(NULL);
9457 }
9458 if (*URL != 0) {
9459 uri = xmlParseURI((const char *) URL);
9460 if (uri == NULL) {
9461 xmlNsErr(ctxt, XML_WAR_NS_URI,
9462 "xmlns: '%s' is not a valid URI\n",
9463 URL, NULL, NULL);
9464 } else {
9465 if (uri->scheme == NULL) {
9466 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9467 "xmlns: URI %s is not absolute\n",
9468 URL, NULL, NULL);
9469 }
9470 xmlFreeURI(uri);
9471 }
Daniel Veillard37334572008-07-31 08:20:02 +00009472 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009473 if (attname != ctxt->str_xml) {
9474 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9475 "xml namespace URI cannot be the default namespace\n",
9476 NULL, NULL, NULL);
9477 }
9478 goto next_attr;
9479 }
9480 if ((len == 29) &&
9481 (xmlStrEqual(URL,
9482 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9483 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9484 "reuse of the xmlns namespace name is forbidden\n",
9485 NULL, NULL, NULL);
9486 goto next_attr;
9487 }
9488 }
9489 /*
9490 * check that it's not a defined namespace
9491 */
9492 for (j = 1;j <= nbNs;j++)
9493 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9494 break;
9495 if (j <= nbNs)
9496 xmlErrAttributeDup(ctxt, NULL, attname);
9497 else
9498 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009499
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009500 } else if (aprefix == ctxt->str_xmlns) {
9501 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9502 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009503
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009504 if (attname == ctxt->str_xml) {
9505 if (URL != ctxt->str_xml_ns) {
9506 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9507 "xml namespace prefix mapped to wrong URI\n",
9508 NULL, NULL, NULL);
9509 }
9510 /*
9511 * Do not keep a namespace definition node
9512 */
9513 goto next_attr;
9514 }
9515 if (URL == ctxt->str_xml_ns) {
9516 if (attname != ctxt->str_xml) {
9517 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9518 "xml namespace URI mapped to wrong prefix\n",
9519 NULL, NULL, NULL);
9520 }
9521 goto next_attr;
9522 }
9523 if (attname == ctxt->str_xmlns) {
9524 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9525 "redefinition of the xmlns prefix is forbidden\n",
9526 NULL, NULL, NULL);
9527 goto next_attr;
9528 }
9529 if ((len == 29) &&
9530 (xmlStrEqual(URL,
9531 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9532 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9533 "reuse of the xmlns namespace name is forbidden\n",
9534 NULL, NULL, NULL);
9535 goto next_attr;
9536 }
9537 if ((URL == NULL) || (URL[0] == 0)) {
9538 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9539 "xmlns:%s: Empty XML namespace is not allowed\n",
9540 attname, NULL, NULL);
9541 goto next_attr;
9542 } else {
9543 uri = xmlParseURI((const char *) URL);
9544 if (uri == NULL) {
9545 xmlNsErr(ctxt, XML_WAR_NS_URI,
9546 "xmlns:%s: '%s' is not a valid URI\n",
9547 attname, URL, NULL);
9548 } else {
9549 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9550 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9551 "xmlns:%s: URI %s is not absolute\n",
9552 attname, URL, NULL);
9553 }
9554 xmlFreeURI(uri);
9555 }
9556 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009557
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009558 /*
9559 * check that it's not a defined namespace
9560 */
9561 for (j = 1;j <= nbNs;j++)
9562 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9563 break;
9564 if (j <= nbNs)
9565 xmlErrAttributeDup(ctxt, aprefix, attname);
9566 else
9567 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9568
9569 } else {
9570 /*
9571 * Add the pair to atts
9572 */
9573 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9574 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9575 goto next_attr;
9576 }
9577 maxatts = ctxt->maxatts;
9578 atts = ctxt->atts;
9579 }
9580 ctxt->attallocs[nratts++] = alloc;
9581 atts[nbatts++] = attname;
9582 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009583 /*
9584 * The namespace URI field is used temporarily to point at the
9585 * base of the current input buffer for non-alloced attributes.
9586 * When the input buffer is reallocated, all the pointers become
9587 * invalid, but they can be reconstructed later.
9588 */
9589 if (alloc)
9590 atts[nbatts++] = NULL;
9591 else
9592 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009593 atts[nbatts++] = attvalue;
9594 attvalue += len;
9595 atts[nbatts++] = attvalue;
9596 /*
9597 * tag if some deallocation is needed
9598 */
9599 if (alloc != 0) attval = 1;
9600 attvalue = NULL; /* moved into atts */
9601 }
9602
9603next_attr:
9604 if ((attvalue != NULL) && (alloc != 0)) {
9605 xmlFree(attvalue);
9606 attvalue = NULL;
9607 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009608
9609 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009610 if (ctxt->instate == XML_PARSER_EOF)
9611 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009612 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9613 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009614 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009615 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9616 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009617 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009618 }
9619 SKIP_BLANKS;
9620 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9621 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009623 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009624 break;
9625 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009626 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009627 }
9628
9629 /* Reconstruct attribute value pointers. */
9630 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9631 if (atts[i+2] != NULL) {
9632 /*
9633 * Arithmetic on dangling pointers is technically undefined
9634 * behavior, but well...
9635 */
9636 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9637 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9638 atts[i+3] += offset; /* value */
9639 atts[i+4] += offset; /* valuend */
9640 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009641 }
9642
Daniel Veillard0fb18932003-09-07 09:14:37 +00009643 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009644 * The attributes defaulting
9645 */
9646 if (ctxt->attsDefault != NULL) {
9647 xmlDefAttrsPtr defaults;
9648
9649 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9650 if (defaults != NULL) {
9651 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009652 attname = defaults->values[5 * i];
9653 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009654
9655 /*
9656 * special work for namespaces defaulted defs
9657 */
9658 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9659 /*
9660 * check that it's not a defined namespace
9661 */
9662 for (j = 1;j <= nbNs;j++)
9663 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9664 break;
9665 if (j <= nbNs) continue;
9666
9667 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009668 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009669 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009670 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009671 nbNs++;
9672 }
9673 } else if (aprefix == ctxt->str_xmlns) {
9674 /*
9675 * check that it's not a defined namespace
9676 */
9677 for (j = 1;j <= nbNs;j++)
9678 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9679 break;
9680 if (j <= nbNs) continue;
9681
9682 nsname = xmlGetNamespace(ctxt, attname);
9683 if (nsname != defaults->values[2]) {
9684 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009685 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009686 nbNs++;
9687 }
9688 } else {
9689 /*
9690 * check that it's not a defined attribute
9691 */
9692 for (j = 0;j < nbatts;j+=5) {
9693 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9694 break;
9695 }
9696 if (j < nbatts) continue;
9697
9698 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9699 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009700 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009701 }
9702 maxatts = ctxt->maxatts;
9703 atts = ctxt->atts;
9704 }
9705 atts[nbatts++] = attname;
9706 atts[nbatts++] = aprefix;
9707 if (aprefix == NULL)
9708 atts[nbatts++] = NULL;
9709 else
9710 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009711 atts[nbatts++] = defaults->values[5 * i + 2];
9712 atts[nbatts++] = defaults->values[5 * i + 3];
9713 if ((ctxt->standalone == 1) &&
9714 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009715 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009716 "standalone: attribute %s on %s defaulted from external subset\n",
9717 attname, localname);
9718 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009719 nbdef++;
9720 }
9721 }
9722 }
9723 }
9724
Daniel Veillarde70c8772003-11-25 07:21:18 +00009725 /*
9726 * The attributes checkings
9727 */
9728 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009729 /*
9730 * The default namespace does not apply to attribute names.
9731 */
9732 if (atts[i + 1] != NULL) {
9733 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9734 if (nsname == NULL) {
9735 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9736 "Namespace prefix %s for %s on %s is not defined\n",
9737 atts[i + 1], atts[i], localname);
9738 }
9739 atts[i + 2] = nsname;
9740 } else
9741 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009742 /*
9743 * [ WFC: Unique Att Spec ]
9744 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009745 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009746 * As extended by the Namespace in XML REC.
9747 */
9748 for (j = 0; j < i;j += 5) {
9749 if (atts[i] == atts[j]) {
9750 if (atts[i+1] == atts[j+1]) {
9751 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9752 break;
9753 }
9754 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9755 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9756 "Namespaced Attribute %s in '%s' redefined\n",
9757 atts[i], nsname, NULL);
9758 break;
9759 }
9760 }
9761 }
9762 }
9763
Daniel Veillarde57ec792003-09-10 10:50:59 +00009764 nsname = xmlGetNamespace(ctxt, prefix);
9765 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009766 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9767 "Namespace prefix %s on %s is not defined\n",
9768 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009769 }
9770 *pref = prefix;
9771 *URI = nsname;
9772
9773 /*
9774 * SAX: Start of Element !
9775 */
9776 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9777 (!ctxt->disableSAX)) {
9778 if (nbNs > 0)
9779 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9780 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9781 nbatts / 5, nbdef, atts);
9782 else
9783 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9784 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9785 }
9786
9787 /*
9788 * Free up attribute allocated strings if needed
9789 */
9790 if (attval != 0) {
9791 for (i = 3,j = 0; j < nratts;i += 5,j++)
9792 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9793 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009794 }
9795
9796 return(localname);
9797}
9798
9799/**
9800 * xmlParseEndTag2:
9801 * @ctxt: an XML parser context
9802 * @line: line of the start tag
9803 * @nsNr: number of namespaces on the start tag
9804 *
9805 * parse an end of tag
9806 *
9807 * [42] ETag ::= '</' Name S? '>'
9808 *
9809 * With namespace
9810 *
9811 * [NS 9] ETag ::= '</' QName S? '>'
9812 */
9813
9814static void
9815xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009816 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009817 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009818 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009819
9820 GROW;
9821 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009822 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009823 return;
9824 }
9825 SKIP(2);
9826
David Kilzerdb07dd62016-02-12 09:58:29 -08009827 curLength = ctxt->input->end - ctxt->input->cur;
9828 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9829 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9830 if ((curLength >= (size_t)(tlen + 1)) &&
9831 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009832 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009833 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009834 goto done;
9835 }
9836 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009837 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009838 name = (xmlChar*)1;
9839 } else {
9840 if (prefix == NULL)
9841 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9842 else
9843 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9844 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009845
9846 /*
9847 * We should definitely be at the ending "S? '>'" part
9848 */
9849 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009850 if (ctxt->instate == XML_PARSER_EOF)
9851 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009852 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009853 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009854 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009855 } else
9856 NEXT1;
9857
9858 /*
9859 * [ WFC: Element Type Match ]
9860 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009861 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009862 *
9863 */
9864 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009865 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009866 if ((line == 0) && (ctxt->node != NULL))
9867 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009868 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009869 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009870 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009871 }
9872
9873 /*
9874 * SAX: End of Tag
9875 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009876done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009877 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9878 (!ctxt->disableSAX))
9879 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9880
Daniel Veillard0fb18932003-09-07 09:14:37 +00009881 spacePop(ctxt);
9882 if (nsNr != 0)
9883 nsPop(ctxt, nsNr);
9884 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009885}
9886
9887/**
Owen Taylor3473f882001-02-23 17:55:21 +00009888 * xmlParseCDSect:
9889 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009890 *
Owen Taylor3473f882001-02-23 17:55:21 +00009891 * Parse escaped pure raw content.
9892 *
9893 * [18] CDSect ::= CDStart CData CDEnd
9894 *
9895 * [19] CDStart ::= '<![CDATA['
9896 *
9897 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9898 *
9899 * [21] CDEnd ::= ']]>'
9900 */
9901void
9902xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9903 xmlChar *buf = NULL;
9904 int len = 0;
9905 int size = XML_PARSER_BUFFER_SIZE;
9906 int r, rl;
9907 int s, sl;
9908 int cur, l;
9909 int count = 0;
9910
Daniel Veillard8f597c32003-10-06 08:19:27 +00009911 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009912 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009913 SKIP(9);
9914 } else
9915 return;
9916
9917 ctxt->instate = XML_PARSER_CDATA_SECTION;
9918 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009919 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009920 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009921 ctxt->instate = XML_PARSER_CONTENT;
9922 return;
9923 }
9924 NEXTL(rl);
9925 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009926 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009927 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009928 ctxt->instate = XML_PARSER_CONTENT;
9929 return;
9930 }
9931 NEXTL(sl);
9932 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009933 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009934 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009935 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009936 return;
9937 }
William M. Brack871611b2003-10-18 04:53:14 +00009938 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009939 ((r != ']') || (s != ']') || (cur != '>'))) {
9940 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009941 xmlChar *tmp;
9942
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009943 if ((size > XML_MAX_TEXT_LENGTH) &&
9944 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9945 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9946 "CData section too big found", NULL);
9947 xmlFree (buf);
9948 return;
9949 }
9950 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009951 if (tmp == NULL) {
9952 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009953 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009954 return;
9955 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009956 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009957 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009958 }
9959 COPY_BUF(rl,buf,len,r);
9960 r = s;
9961 rl = sl;
9962 s = cur;
9963 sl = l;
9964 count++;
9965 if (count > 50) {
9966 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009967 if (ctxt->instate == XML_PARSER_EOF) {
9968 xmlFree(buf);
9969 return;
9970 }
Owen Taylor3473f882001-02-23 17:55:21 +00009971 count = 0;
9972 }
9973 NEXTL(l);
9974 cur = CUR_CHAR(l);
9975 }
9976 buf[len] = 0;
9977 ctxt->instate = XML_PARSER_CONTENT;
9978 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009979 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009980 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009981 xmlFree(buf);
9982 return;
9983 }
9984 NEXTL(l);
9985
9986 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009987 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009988 */
9989 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9990 if (ctxt->sax->cdataBlock != NULL)
9991 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009992 else if (ctxt->sax->characters != NULL)
9993 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009994 }
9995 xmlFree(buf);
9996}
9997
9998/**
9999 * xmlParseContent:
10000 * @ctxt: an XML parser context
10001 *
10002 * Parse a content:
10003 *
10004 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10005 */
10006
10007void
10008xmlParseContent(xmlParserCtxtPtr ctxt) {
10009 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +000010010 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010011 ((RAW != '<') || (NXT(1) != '/')) &&
10012 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010013 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +000010014 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +000010015 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010016
10017 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010018 * First case : a Processing Instruction.
10019 */
Daniel Veillardfdc91562002-07-01 21:52:03 +000010020 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010021 xmlParsePI(ctxt);
10022 }
10023
10024 /*
10025 * Second case : a CDSection
10026 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010027 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010028 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010029 xmlParseCDSect(ctxt);
10030 }
10031
10032 /*
10033 * Third case : a comment
10034 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010035 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010036 (NXT(2) == '-') && (NXT(3) == '-')) {
10037 xmlParseComment(ctxt);
10038 ctxt->instate = XML_PARSER_CONTENT;
10039 }
10040
10041 /*
10042 * Fourth case : a sub-element.
10043 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010044 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +000010045 xmlParseElement(ctxt);
10046 }
10047
10048 /*
10049 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010050 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +000010051 */
10052
Daniel Veillard21a0f912001-02-25 19:54:14 +000010053 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +000010054 xmlParseReference(ctxt);
10055 }
10056
10057 /*
10058 * Last case, text. Note that References are handled directly.
10059 */
10060 else {
10061 xmlParseCharData(ctxt, 0);
10062 }
10063
10064 GROW;
10065 /*
10066 * Pop-up of finished entities.
10067 */
Daniel Veillard561b7f82002-03-20 21:55:57 +000010068 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +000010069 xmlPopInput(ctxt);
10070 SHRINK;
10071
Daniel Veillardfdc91562002-07-01 21:52:03 +000010072 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010073 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10074 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080010075 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010076 break;
10077 }
10078 }
10079}
10080
10081/**
10082 * xmlParseElement:
10083 * @ctxt: an XML parser context
10084 *
10085 * parse an XML element, this is highly recursive
10086 *
10087 * [39] element ::= EmptyElemTag | STag content ETag
10088 *
10089 * [ WFC: Element Type Match ]
10090 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010091 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +000010092 *
Owen Taylor3473f882001-02-23 17:55:21 +000010093 */
10094
10095void
10096xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010097 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010098 const xmlChar *prefix = NULL;
10099 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010100 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010101 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010102 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010103 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010104
Daniel Veillard8915c152008-08-26 13:05:34 +000010105 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10106 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10107 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10108 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10109 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010110 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010111 return;
10112 }
10113
Owen Taylor3473f882001-02-23 17:55:21 +000010114 /* Capture start position */
10115 if (ctxt->record_info) {
10116 node_info.begin_pos = ctxt->input->consumed +
10117 (CUR_PTR - ctxt->input->base);
10118 node_info.begin_line = ctxt->input->line;
10119 }
10120
10121 if (ctxt->spaceNr == 0)
10122 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010123 else if (*ctxt->space == -2)
10124 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010125 else
10126 spacePush(ctxt, *ctxt->space);
10127
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010128 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010129#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010130 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010131#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010132 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010133#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010134 else
10135 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010136#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010137 if (ctxt->instate == XML_PARSER_EOF)
10138 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010139 if (name == NULL) {
10140 spacePop(ctxt);
10141 return;
10142 }
10143 namePush(ctxt, name);
10144 ret = ctxt->node;
10145
Daniel Veillard4432df22003-09-28 18:58:27 +000010146#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010147 /*
10148 * [ VC: Root Element Type ]
10149 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010150 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010151 */
10152 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10153 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10154 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010155#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010156
10157 /*
10158 * Check for an Empty Element.
10159 */
10160 if ((RAW == '/') && (NXT(1) == '>')) {
10161 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010162 if (ctxt->sax2) {
10163 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10164 (!ctxt->disableSAX))
10165 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010166#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010167 } else {
10168 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10169 (!ctxt->disableSAX))
10170 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010171#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010172 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010173 namePop(ctxt);
10174 spacePop(ctxt);
10175 if (nsNr != ctxt->nsNr)
10176 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010177 if ( ret != NULL && ctxt->record_info ) {
10178 node_info.end_pos = ctxt->input->consumed +
10179 (CUR_PTR - ctxt->input->base);
10180 node_info.end_line = ctxt->input->line;
10181 node_info.node = ret;
10182 xmlParserAddNodeInfo(ctxt, &node_info);
10183 }
10184 return;
10185 }
10186 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010187 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010188 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010189 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10190 "Couldn't find end of Start Tag %s line %d\n",
10191 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010192
10193 /*
10194 * end of parsing of this node.
10195 */
10196 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010197 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010198 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010199 if (nsNr != ctxt->nsNr)
10200 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010201
10202 /*
10203 * Capture end position and add node
10204 */
10205 if ( ret != NULL && ctxt->record_info ) {
10206 node_info.end_pos = ctxt->input->consumed +
10207 (CUR_PTR - ctxt->input->base);
10208 node_info.end_line = ctxt->input->line;
10209 node_info.node = ret;
10210 xmlParserAddNodeInfo(ctxt, &node_info);
10211 }
10212 return;
10213 }
10214
10215 /*
10216 * Parse the content of the element:
10217 */
10218 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010219 if (ctxt->instate == XML_PARSER_EOF)
10220 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010221 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010222 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010223 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010224 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010225
10226 /*
10227 * end of parsing of this node.
10228 */
10229 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010230 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010231 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010232 if (nsNr != ctxt->nsNr)
10233 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010234 return;
10235 }
10236
10237 /*
10238 * parse the end of tag: '</' should be here.
10239 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010240 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010241 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010242 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010243 }
10244#ifdef LIBXML_SAX1_ENABLED
10245 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010246 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010247#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010248
10249 /*
10250 * Capture end position and add node
10251 */
10252 if ( ret != NULL && ctxt->record_info ) {
10253 node_info.end_pos = ctxt->input->consumed +
10254 (CUR_PTR - ctxt->input->base);
10255 node_info.end_line = ctxt->input->line;
10256 node_info.node = ret;
10257 xmlParserAddNodeInfo(ctxt, &node_info);
10258 }
10259}
10260
10261/**
10262 * xmlParseVersionNum:
10263 * @ctxt: an XML parser context
10264 *
10265 * parse the XML version value.
10266 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010267 * [26] VersionNum ::= '1.' [0-9]+
10268 *
10269 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010270 *
10271 * Returns the string giving the XML version number, or NULL
10272 */
10273xmlChar *
10274xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10275 xmlChar *buf = NULL;
10276 int len = 0;
10277 int size = 10;
10278 xmlChar cur;
10279
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010280 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010281 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010282 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010283 return(NULL);
10284 }
10285 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010286 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010287 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010288 return(NULL);
10289 }
10290 buf[len++] = cur;
10291 NEXT;
10292 cur=CUR;
10293 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010294 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010295 return(NULL);
10296 }
10297 buf[len++] = cur;
10298 NEXT;
10299 cur=CUR;
10300 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010301 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010302 xmlChar *tmp;
10303
Owen Taylor3473f882001-02-23 17:55:21 +000010304 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010305 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10306 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010307 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010308 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010309 return(NULL);
10310 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010311 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010312 }
10313 buf[len++] = cur;
10314 NEXT;
10315 cur=CUR;
10316 }
10317 buf[len] = 0;
10318 return(buf);
10319}
10320
10321/**
10322 * xmlParseVersionInfo:
10323 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010324 *
Owen Taylor3473f882001-02-23 17:55:21 +000010325 * parse the XML version.
10326 *
10327 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010328 *
Owen Taylor3473f882001-02-23 17:55:21 +000010329 * [25] Eq ::= S? '=' S?
10330 *
10331 * Returns the version string, e.g. "1.0"
10332 */
10333
10334xmlChar *
10335xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10336 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010337
Daniel Veillarda07050d2003-10-19 14:46:32 +000010338 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010339 SKIP(7);
10340 SKIP_BLANKS;
10341 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010342 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010343 return(NULL);
10344 }
10345 NEXT;
10346 SKIP_BLANKS;
10347 if (RAW == '"') {
10348 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010349 version = xmlParseVersionNum(ctxt);
10350 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010351 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010352 } else
10353 NEXT;
10354 } else if (RAW == '\''){
10355 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010356 version = xmlParseVersionNum(ctxt);
10357 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010358 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010359 } else
10360 NEXT;
10361 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010362 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010363 }
10364 }
10365 return(version);
10366}
10367
10368/**
10369 * xmlParseEncName:
10370 * @ctxt: an XML parser context
10371 *
10372 * parse the XML encoding name
10373 *
10374 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10375 *
10376 * Returns the encoding name value or NULL
10377 */
10378xmlChar *
10379xmlParseEncName(xmlParserCtxtPtr ctxt) {
10380 xmlChar *buf = NULL;
10381 int len = 0;
10382 int size = 10;
10383 xmlChar cur;
10384
10385 cur = CUR;
10386 if (((cur >= 'a') && (cur <= 'z')) ||
10387 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010388 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010389 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010390 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010391 return(NULL);
10392 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010393
Owen Taylor3473f882001-02-23 17:55:21 +000010394 buf[len++] = cur;
10395 NEXT;
10396 cur = CUR;
10397 while (((cur >= 'a') && (cur <= 'z')) ||
10398 ((cur >= 'A') && (cur <= 'Z')) ||
10399 ((cur >= '0') && (cur <= '9')) ||
10400 (cur == '.') || (cur == '_') ||
10401 (cur == '-')) {
10402 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010403 xmlChar *tmp;
10404
Owen Taylor3473f882001-02-23 17:55:21 +000010405 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010406 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10407 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010408 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010409 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010410 return(NULL);
10411 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010412 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010413 }
10414 buf[len++] = cur;
10415 NEXT;
10416 cur = CUR;
10417 if (cur == 0) {
10418 SHRINK;
10419 GROW;
10420 cur = CUR;
10421 }
10422 }
10423 buf[len] = 0;
10424 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010425 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010426 }
10427 return(buf);
10428}
10429
10430/**
10431 * xmlParseEncodingDecl:
10432 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010433 *
Owen Taylor3473f882001-02-23 17:55:21 +000010434 * parse the XML encoding declaration
10435 *
10436 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10437 *
10438 * this setups the conversion filters.
10439 *
10440 * Returns the encoding value or NULL
10441 */
10442
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010443const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010444xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10445 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010446
10447 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010448 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010449 SKIP(8);
10450 SKIP_BLANKS;
10451 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010452 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010453 return(NULL);
10454 }
10455 NEXT;
10456 SKIP_BLANKS;
10457 if (RAW == '"') {
10458 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010459 encoding = xmlParseEncName(ctxt);
10460 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010461 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010462 xmlFree((xmlChar *) encoding);
10463 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010464 } else
10465 NEXT;
10466 } else if (RAW == '\''){
10467 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010468 encoding = xmlParseEncName(ctxt);
10469 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010470 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010471 xmlFree((xmlChar *) encoding);
10472 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010473 } else
10474 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010475 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010476 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010477 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010478
10479 /*
10480 * Non standard parsing, allowing the user to ignore encoding
10481 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010482 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10483 xmlFree((xmlChar *) encoding);
10484 return(NULL);
10485 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010486
Daniel Veillard6b621b82003-08-11 15:03:34 +000010487 /*
10488 * UTF-16 encoding stwich has already taken place at this stage,
10489 * more over the little-endian/big-endian selection is already done
10490 */
10491 if ((encoding != NULL) &&
10492 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10493 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010494 /*
10495 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010496 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010497 * document is apparently UTF-8 compatible, then raise an
10498 * encoding mismatch fatal error
10499 */
10500 if ((ctxt->encoding == NULL) &&
10501 (ctxt->input->buf != NULL) &&
10502 (ctxt->input->buf->encoder == NULL)) {
10503 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10504 "Document labelled UTF-16 but has UTF-8 content\n");
10505 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010506 if (ctxt->encoding != NULL)
10507 xmlFree((xmlChar *) ctxt->encoding);
10508 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010509 }
10510 /*
10511 * UTF-8 encoding is handled natively
10512 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010513 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010514 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10515 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010516 if (ctxt->encoding != NULL)
10517 xmlFree((xmlChar *) ctxt->encoding);
10518 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010519 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010520 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010521 xmlCharEncodingHandlerPtr handler;
10522
10523 if (ctxt->input->encoding != NULL)
10524 xmlFree((xmlChar *) ctxt->input->encoding);
10525 ctxt->input->encoding = encoding;
10526
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010527 handler = xmlFindCharEncodingHandler((const char *) encoding);
10528 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010529 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10530 /* failed to convert */
10531 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10532 return(NULL);
10533 }
Owen Taylor3473f882001-02-23 17:55:21 +000010534 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010535 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010536 "Unsupported encoding %s\n", encoding);
10537 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010538 }
10539 }
10540 }
10541 return(encoding);
10542}
10543
10544/**
10545 * xmlParseSDDecl:
10546 * @ctxt: an XML parser context
10547 *
10548 * parse the XML standalone declaration
10549 *
10550 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010551 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010552 *
10553 * [ VC: Standalone Document Declaration ]
10554 * TODO The standalone document declaration must have the value "no"
10555 * if any external markup declarations contain declarations of:
10556 * - attributes with default values, if elements to which these
10557 * attributes apply appear in the document without specifications
10558 * of values for these attributes, or
10559 * - entities (other than amp, lt, gt, apos, quot), if references
10560 * to those entities appear in the document, or
10561 * - attributes with values subject to normalization, where the
10562 * attribute appears in the document with a value which will change
10563 * as a result of normalization, or
10564 * - element types with element content, if white space occurs directly
10565 * within any instance of those types.
10566 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010567 * Returns:
10568 * 1 if standalone="yes"
10569 * 0 if standalone="no"
10570 * -2 if standalone attribute is missing or invalid
10571 * (A standalone value of -2 means that the XML declaration was found,
10572 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010573 */
10574
10575int
10576xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010577 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010578
10579 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010580 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010581 SKIP(10);
10582 SKIP_BLANKS;
10583 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010584 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010585 return(standalone);
10586 }
10587 NEXT;
10588 SKIP_BLANKS;
10589 if (RAW == '\''){
10590 NEXT;
10591 if ((RAW == 'n') && (NXT(1) == 'o')) {
10592 standalone = 0;
10593 SKIP(2);
10594 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10595 (NXT(2) == 's')) {
10596 standalone = 1;
10597 SKIP(3);
10598 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010599 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010600 }
10601 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010602 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010603 } else
10604 NEXT;
10605 } else if (RAW == '"'){
10606 NEXT;
10607 if ((RAW == 'n') && (NXT(1) == 'o')) {
10608 standalone = 0;
10609 SKIP(2);
10610 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10611 (NXT(2) == 's')) {
10612 standalone = 1;
10613 SKIP(3);
10614 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010615 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010616 }
10617 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010618 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010619 } else
10620 NEXT;
10621 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010622 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010623 }
10624 }
10625 return(standalone);
10626}
10627
10628/**
10629 * xmlParseXMLDecl:
10630 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010631 *
Owen Taylor3473f882001-02-23 17:55:21 +000010632 * parse an XML declaration header
10633 *
10634 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10635 */
10636
10637void
10638xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10639 xmlChar *version;
10640
10641 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010642 * This value for standalone indicates that the document has an
10643 * XML declaration but it does not have a standalone attribute.
10644 * It will be overwritten later if a standalone attribute is found.
10645 */
10646 ctxt->input->standalone = -2;
10647
10648 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010649 * We know that '<?xml' is here.
10650 */
10651 SKIP(5);
10652
William M. Brack76e95df2003-10-18 16:20:14 +000010653 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010654 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10655 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010656 }
10657 SKIP_BLANKS;
10658
10659 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010660 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010661 */
10662 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010663 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010664 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010665 } else {
10666 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10667 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010668 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010669 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010670 if (ctxt->options & XML_PARSE_OLD10) {
10671 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10672 "Unsupported version '%s'\n",
10673 version);
10674 } else {
10675 if ((version[0] == '1') && ((version[1] == '.'))) {
10676 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10677 "Unsupported version '%s'\n",
10678 version, NULL);
10679 } else {
10680 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10681 "Unsupported version '%s'\n",
10682 version);
10683 }
10684 }
Daniel Veillard19840942001-11-29 16:11:38 +000010685 }
10686 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010687 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010688 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010689 }
Owen Taylor3473f882001-02-23 17:55:21 +000010690
10691 /*
10692 * We may have the encoding declaration
10693 */
William M. Brack76e95df2003-10-18 16:20:14 +000010694 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010695 if ((RAW == '?') && (NXT(1) == '>')) {
10696 SKIP(2);
10697 return;
10698 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010699 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010700 }
10701 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010702 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10703 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010704 /*
10705 * The XML REC instructs us to stop parsing right here
10706 */
10707 return;
10708 }
10709
10710 /*
10711 * We may have the standalone status.
10712 */
William M. Brack76e95df2003-10-18 16:20:14 +000010713 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010714 if ((RAW == '?') && (NXT(1) == '>')) {
10715 SKIP(2);
10716 return;
10717 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010718 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010719 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010720
10721 /*
10722 * We can grow the input buffer freely at that point
10723 */
10724 GROW;
10725
Owen Taylor3473f882001-02-23 17:55:21 +000010726 SKIP_BLANKS;
10727 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10728
10729 SKIP_BLANKS;
10730 if ((RAW == '?') && (NXT(1) == '>')) {
10731 SKIP(2);
10732 } else if (RAW == '>') {
10733 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010734 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010735 NEXT;
10736 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010737 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010738 MOVETO_ENDTAG(CUR_PTR);
10739 NEXT;
10740 }
10741}
10742
10743/**
10744 * xmlParseMisc:
10745 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010746 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010747 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010748 *
10749 * [27] Misc ::= Comment | PI | S
10750 */
10751
10752void
10753xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010754 while ((ctxt->instate != XML_PARSER_EOF) &&
10755 (((RAW == '<') && (NXT(1) == '?')) ||
10756 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10757 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010758 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010759 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010760 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010761 NEXT;
10762 } else
10763 xmlParseComment(ctxt);
10764 }
10765}
10766
10767/**
10768 * xmlParseDocument:
10769 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010770 *
Owen Taylor3473f882001-02-23 17:55:21 +000010771 * parse an XML document (and build a tree if using the standard SAX
10772 * interface).
10773 *
10774 * [1] document ::= prolog element Misc*
10775 *
10776 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10777 *
10778 * Returns 0, -1 in case of error. the parser context is augmented
10779 * as a result of the parsing.
10780 */
10781
10782int
10783xmlParseDocument(xmlParserCtxtPtr ctxt) {
10784 xmlChar start[4];
10785 xmlCharEncoding enc;
10786
10787 xmlInitParser();
10788
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010789 if ((ctxt == NULL) || (ctxt->input == NULL))
10790 return(-1);
10791
Owen Taylor3473f882001-02-23 17:55:21 +000010792 GROW;
10793
10794 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010795 * SAX: detecting the level.
10796 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010797 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010798
10799 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010800 * SAX: beginning of the document processing.
10801 */
10802 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10803 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010804 if (ctxt->instate == XML_PARSER_EOF)
10805 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010806
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010807 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010808 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010809 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010810 * Get the 4 first bytes and decode the charset
10811 * if enc != XML_CHAR_ENCODING_NONE
10812 * plug some encoding conversion routines.
10813 */
10814 start[0] = RAW;
10815 start[1] = NXT(1);
10816 start[2] = NXT(2);
10817 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010818 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010819 if (enc != XML_CHAR_ENCODING_NONE) {
10820 xmlSwitchEncoding(ctxt, enc);
10821 }
Owen Taylor3473f882001-02-23 17:55:21 +000010822 }
10823
10824
10825 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010826 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010827 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010828 }
10829
10830 /*
10831 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010832 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010833 * than just the first line, unless the amount of data is really
10834 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010835 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010836 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10837 GROW;
10838 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010839 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010840
10841 /*
10842 * Note that we will switch encoding on the fly.
10843 */
10844 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010845 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10846 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010847 /*
10848 * The XML REC instructs us to stop parsing right here
10849 */
10850 return(-1);
10851 }
10852 ctxt->standalone = ctxt->input->standalone;
10853 SKIP_BLANKS;
10854 } else {
10855 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10856 }
10857 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10858 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010859 if (ctxt->instate == XML_PARSER_EOF)
10860 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010861 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10862 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10863 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10864 }
Owen Taylor3473f882001-02-23 17:55:21 +000010865
10866 /*
10867 * The Misc part of the Prolog
10868 */
10869 GROW;
10870 xmlParseMisc(ctxt);
10871
10872 /*
10873 * Then possibly doc type declaration(s) and more Misc
10874 * (doctypedecl Misc*)?
10875 */
10876 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010877 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010878
10879 ctxt->inSubset = 1;
10880 xmlParseDocTypeDecl(ctxt);
10881 if (RAW == '[') {
10882 ctxt->instate = XML_PARSER_DTD;
10883 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010884 if (ctxt->instate == XML_PARSER_EOF)
10885 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010886 }
10887
10888 /*
10889 * Create and update the external subset.
10890 */
10891 ctxt->inSubset = 2;
10892 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10893 (!ctxt->disableSAX))
10894 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10895 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010896 if (ctxt->instate == XML_PARSER_EOF)
10897 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010898 ctxt->inSubset = 0;
10899
Daniel Veillardac4118d2008-01-11 05:27:32 +000010900 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010901
10902 ctxt->instate = XML_PARSER_PROLOG;
10903 xmlParseMisc(ctxt);
10904 }
10905
10906 /*
10907 * Time to start parsing the tree itself
10908 */
10909 GROW;
10910 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010911 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10912 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010913 } else {
10914 ctxt->instate = XML_PARSER_CONTENT;
10915 xmlParseElement(ctxt);
10916 ctxt->instate = XML_PARSER_EPILOG;
10917
10918
10919 /*
10920 * The Misc part at the end
10921 */
10922 xmlParseMisc(ctxt);
10923
Daniel Veillard561b7f82002-03-20 21:55:57 +000010924 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010925 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010926 }
10927 ctxt->instate = XML_PARSER_EOF;
10928 }
10929
10930 /*
10931 * SAX: end of the document processing.
10932 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010933 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010934 ctxt->sax->endDocument(ctxt->userData);
10935
Daniel Veillard5997aca2002-03-18 18:36:20 +000010936 /*
10937 * Remove locally kept entity definitions if the tree was not built
10938 */
10939 if ((ctxt->myDoc != NULL) &&
10940 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10941 xmlFreeDoc(ctxt->myDoc);
10942 ctxt->myDoc = NULL;
10943 }
10944
Daniel Veillardae0765b2008-07-31 19:54:59 +000010945 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10946 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10947 if (ctxt->valid)
10948 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10949 if (ctxt->nsWellFormed)
10950 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10951 if (ctxt->options & XML_PARSE_OLD10)
10952 ctxt->myDoc->properties |= XML_DOC_OLD10;
10953 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010954 if (! ctxt->wellFormed) {
10955 ctxt->valid = 0;
10956 return(-1);
10957 }
Owen Taylor3473f882001-02-23 17:55:21 +000010958 return(0);
10959}
10960
10961/**
10962 * xmlParseExtParsedEnt:
10963 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010964 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010965 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010966 * An external general parsed entity is well-formed if it matches the
10967 * production labeled extParsedEnt.
10968 *
10969 * [78] extParsedEnt ::= TextDecl? content
10970 *
10971 * Returns 0, -1 in case of error. the parser context is augmented
10972 * as a result of the parsing.
10973 */
10974
10975int
10976xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10977 xmlChar start[4];
10978 xmlCharEncoding enc;
10979
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010980 if ((ctxt == NULL) || (ctxt->input == NULL))
10981 return(-1);
10982
Owen Taylor3473f882001-02-23 17:55:21 +000010983 xmlDefaultSAXHandlerInit();
10984
Daniel Veillard309f81d2003-09-23 09:02:53 +000010985 xmlDetectSAX2(ctxt);
10986
Owen Taylor3473f882001-02-23 17:55:21 +000010987 GROW;
10988
10989 /*
10990 * SAX: beginning of the document processing.
10991 */
10992 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10993 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10994
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010995 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010996 * Get the 4 first bytes and decode the charset
10997 * if enc != XML_CHAR_ENCODING_NONE
10998 * plug some encoding conversion routines.
10999 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011000 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11001 start[0] = RAW;
11002 start[1] = NXT(1);
11003 start[2] = NXT(2);
11004 start[3] = NXT(3);
11005 enc = xmlDetectCharEncoding(start, 4);
11006 if (enc != XML_CHAR_ENCODING_NONE) {
11007 xmlSwitchEncoding(ctxt, enc);
11008 }
Owen Taylor3473f882001-02-23 17:55:21 +000011009 }
11010
11011
11012 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011013 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011014 }
11015
11016 /*
11017 * Check for the XMLDecl in the Prolog.
11018 */
11019 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000011020 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011021
11022 /*
11023 * Note that we will switch encoding on the fly.
11024 */
11025 xmlParseXMLDecl(ctxt);
11026 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11027 /*
11028 * The XML REC instructs us to stop parsing right here
11029 */
11030 return(-1);
11031 }
11032 SKIP_BLANKS;
11033 } else {
11034 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11035 }
11036 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11037 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011038 if (ctxt->instate == XML_PARSER_EOF)
11039 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000011040
11041 /*
11042 * Doing validity checking on chunk doesn't make sense
11043 */
11044 ctxt->instate = XML_PARSER_CONTENT;
11045 ctxt->validate = 0;
11046 ctxt->loadsubset = 0;
11047 ctxt->depth = 0;
11048
11049 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011050 if (ctxt->instate == XML_PARSER_EOF)
11051 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011052
Owen Taylor3473f882001-02-23 17:55:21 +000011053 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011054 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011055 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011056 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011057 }
11058
11059 /*
11060 * SAX: end of the document processing.
11061 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011062 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011063 ctxt->sax->endDocument(ctxt->userData);
11064
11065 if (! ctxt->wellFormed) return(-1);
11066 return(0);
11067}
11068
Daniel Veillard73b013f2003-09-30 12:36:01 +000011069#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011070/************************************************************************
11071 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011072 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000011073 * *
11074 ************************************************************************/
11075
11076/**
11077 * xmlParseLookupSequence:
11078 * @ctxt: an XML parser context
11079 * @first: the first char to lookup
11080 * @next: the next char to lookup or zero
11081 * @third: the next char to lookup or zero
11082 *
11083 * Try to find if a sequence (first, next, third) or just (first next) or
11084 * (first) is available in the input stream.
11085 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11086 * to avoid rescanning sequences of bytes, it DOES change the state of the
11087 * parser, do not use liberally.
11088 *
11089 * Returns the index to the current parsing point if the full sequence
11090 * is available, -1 otherwise.
11091 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011092static int
Owen Taylor3473f882001-02-23 17:55:21 +000011093xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11094 xmlChar next, xmlChar third) {
11095 int base, len;
11096 xmlParserInputPtr in;
11097 const xmlChar *buf;
11098
11099 in = ctxt->input;
11100 if (in == NULL) return(-1);
11101 base = in->cur - in->base;
11102 if (base < 0) return(-1);
11103 if (ctxt->checkIndex > base)
11104 base = ctxt->checkIndex;
11105 if (in->buf == NULL) {
11106 buf = in->base;
11107 len = in->length;
11108 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011109 buf = xmlBufContent(in->buf->buffer);
11110 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011111 }
11112 /* take into account the sequence length */
11113 if (third) len -= 2;
11114 else if (next) len --;
11115 for (;base < len;base++) {
11116 if (buf[base] == first) {
11117 if (third != 0) {
11118 if ((buf[base + 1] != next) ||
11119 (buf[base + 2] != third)) continue;
11120 } else if (next != 0) {
11121 if (buf[base + 1] != next) continue;
11122 }
11123 ctxt->checkIndex = 0;
11124#ifdef DEBUG_PUSH
11125 if (next == 0)
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: lookup '%c' found at %d\n",
11128 first, base);
11129 else if (third == 0)
11130 xmlGenericError(xmlGenericErrorContext,
11131 "PP: lookup '%c%c' found at %d\n",
11132 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011133 else
Owen Taylor3473f882001-02-23 17:55:21 +000011134 xmlGenericError(xmlGenericErrorContext,
11135 "PP: lookup '%c%c%c' found at %d\n",
11136 first, next, third, base);
11137#endif
11138 return(base - (in->cur - in->base));
11139 }
11140 }
11141 ctxt->checkIndex = base;
11142#ifdef DEBUG_PUSH
11143 if (next == 0)
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: lookup '%c' failed\n", first);
11146 else if (third == 0)
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011149 else
Owen Taylor3473f882001-02-23 17:55:21 +000011150 xmlGenericError(xmlGenericErrorContext,
11151 "PP: lookup '%c%c%c' failed\n", first, next, third);
11152#endif
11153 return(-1);
11154}
11155
11156/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011157 * xmlParseGetLasts:
11158 * @ctxt: an XML parser context
11159 * @lastlt: pointer to store the last '<' from the input
11160 * @lastgt: pointer to store the last '>' from the input
11161 *
11162 * Lookup the last < and > in the current chunk
11163 */
11164static void
11165xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11166 const xmlChar **lastgt) {
11167 const xmlChar *tmp;
11168
11169 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11170 xmlGenericError(xmlGenericErrorContext,
11171 "Internal error: xmlParseGetLasts\n");
11172 return;
11173 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011174 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011175 tmp = ctxt->input->end;
11176 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011177 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011178 if (tmp < ctxt->input->base) {
11179 *lastlt = NULL;
11180 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011181 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011182 *lastlt = tmp;
11183 tmp++;
11184 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11185 if (*tmp == '\'') {
11186 tmp++;
11187 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11188 if (tmp < ctxt->input->end) tmp++;
11189 } else if (*tmp == '"') {
11190 tmp++;
11191 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11192 if (tmp < ctxt->input->end) tmp++;
11193 } else
11194 tmp++;
11195 }
11196 if (tmp < ctxt->input->end)
11197 *lastgt = tmp;
11198 else {
11199 tmp = *lastlt;
11200 tmp--;
11201 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11202 if (tmp >= ctxt->input->base)
11203 *lastgt = tmp;
11204 else
11205 *lastgt = NULL;
11206 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011207 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011208 } else {
11209 *lastlt = NULL;
11210 *lastgt = NULL;
11211 }
11212}
11213/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011214 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011215 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011216 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011217 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011218 *
11219 * Check that the block of characters is okay as SCdata content [20]
11220 *
11221 * Returns the number of bytes to pass if okay, a negative index where an
11222 * UTF-8 error occured otherwise
11223 */
11224static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011225xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011226 int ix;
11227 unsigned char c;
11228 int codepoint;
11229
11230 if ((utf == NULL) || (len <= 0))
11231 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011232
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011233 for (ix = 0; ix < len;) { /* string is 0-terminated */
11234 c = utf[ix];
11235 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11236 if (c >= 0x20)
11237 ix++;
11238 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11239 ix++;
11240 else
11241 return(-ix);
11242 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011243 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011244 if ((utf[ix+1] & 0xc0 ) != 0x80)
11245 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011246 codepoint = (utf[ix] & 0x1f) << 6;
11247 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011248 if (!xmlIsCharQ(codepoint))
11249 return(-ix);
11250 ix += 2;
11251 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011252 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011253 if (((utf[ix+1] & 0xc0) != 0x80) ||
11254 ((utf[ix+2] & 0xc0) != 0x80))
11255 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011256 codepoint = (utf[ix] & 0xf) << 12;
11257 codepoint |= (utf[ix+1] & 0x3f) << 6;
11258 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011259 if (!xmlIsCharQ(codepoint))
11260 return(-ix);
11261 ix += 3;
11262 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011263 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011264 if (((utf[ix+1] & 0xc0) != 0x80) ||
11265 ((utf[ix+2] & 0xc0) != 0x80) ||
11266 ((utf[ix+3] & 0xc0) != 0x80))
11267 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011268 codepoint = (utf[ix] & 0x7) << 18;
11269 codepoint |= (utf[ix+1] & 0x3f) << 12;
11270 codepoint |= (utf[ix+2] & 0x3f) << 6;
11271 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011272 if (!xmlIsCharQ(codepoint))
11273 return(-ix);
11274 ix += 4;
11275 } else /* unknown encoding */
11276 return(-ix);
11277 }
11278 return(ix);
11279}
11280
11281/**
Owen Taylor3473f882001-02-23 17:55:21 +000011282 * xmlParseTryOrFinish:
11283 * @ctxt: an XML parser context
11284 * @terminate: last chunk indicator
11285 *
11286 * Try to progress on parsing
11287 *
11288 * Returns zero if no parsing was possible
11289 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011290static int
Owen Taylor3473f882001-02-23 17:55:21 +000011291xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11292 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011293 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011294 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011295 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011296
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011297 if (ctxt->input == NULL)
11298 return(0);
11299
Owen Taylor3473f882001-02-23 17:55:21 +000011300#ifdef DEBUG_PUSH
11301 switch (ctxt->instate) {
11302 case XML_PARSER_EOF:
11303 xmlGenericError(xmlGenericErrorContext,
11304 "PP: try EOF\n"); break;
11305 case XML_PARSER_START:
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: try START\n"); break;
11308 case XML_PARSER_MISC:
11309 xmlGenericError(xmlGenericErrorContext,
11310 "PP: try MISC\n");break;
11311 case XML_PARSER_COMMENT:
11312 xmlGenericError(xmlGenericErrorContext,
11313 "PP: try COMMENT\n");break;
11314 case XML_PARSER_PROLOG:
11315 xmlGenericError(xmlGenericErrorContext,
11316 "PP: try PROLOG\n");break;
11317 case XML_PARSER_START_TAG:
11318 xmlGenericError(xmlGenericErrorContext,
11319 "PP: try START_TAG\n");break;
11320 case XML_PARSER_CONTENT:
11321 xmlGenericError(xmlGenericErrorContext,
11322 "PP: try CONTENT\n");break;
11323 case XML_PARSER_CDATA_SECTION:
11324 xmlGenericError(xmlGenericErrorContext,
11325 "PP: try CDATA_SECTION\n");break;
11326 case XML_PARSER_END_TAG:
11327 xmlGenericError(xmlGenericErrorContext,
11328 "PP: try END_TAG\n");break;
11329 case XML_PARSER_ENTITY_DECL:
11330 xmlGenericError(xmlGenericErrorContext,
11331 "PP: try ENTITY_DECL\n");break;
11332 case XML_PARSER_ENTITY_VALUE:
11333 xmlGenericError(xmlGenericErrorContext,
11334 "PP: try ENTITY_VALUE\n");break;
11335 case XML_PARSER_ATTRIBUTE_VALUE:
11336 xmlGenericError(xmlGenericErrorContext,
11337 "PP: try ATTRIBUTE_VALUE\n");break;
11338 case XML_PARSER_DTD:
11339 xmlGenericError(xmlGenericErrorContext,
11340 "PP: try DTD\n");break;
11341 case XML_PARSER_EPILOG:
11342 xmlGenericError(xmlGenericErrorContext,
11343 "PP: try EPILOG\n");break;
11344 case XML_PARSER_PI:
11345 xmlGenericError(xmlGenericErrorContext,
11346 "PP: try PI\n");break;
11347 case XML_PARSER_IGNORE:
11348 xmlGenericError(xmlGenericErrorContext,
11349 "PP: try IGNORE\n");break;
11350 }
11351#endif
11352
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011353 if ((ctxt->input != NULL) &&
11354 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011355 xmlSHRINK(ctxt);
11356 ctxt->checkIndex = 0;
11357 }
11358 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011359
Daniel Veillarde50ba812013-04-11 15:54:51 +080011360 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011361 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011362 return(0);
11363
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011364
Owen Taylor3473f882001-02-23 17:55:21 +000011365 /*
11366 * Pop-up of finished entities.
11367 */
11368 while ((RAW == 0) && (ctxt->inputNr > 1))
11369 xmlPopInput(ctxt);
11370
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011371 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011372 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011373 avail = ctxt->input->length -
11374 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011375 else {
11376 /*
11377 * If we are operating on converted input, try to flush
11378 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011379 * buffer. But do not do this in document start where
11380 * encoding="..." may not have been read and we work on a
11381 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011382 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011383 if ((ctxt->instate != XML_PARSER_START) &&
11384 (ctxt->input->buf->raw != NULL) &&
11385 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011386 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11387 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011388 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011389
11390 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011391 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11392 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011393 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011394 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011395 (ctxt->input->cur - ctxt->input->base);
11396 }
Owen Taylor3473f882001-02-23 17:55:21 +000011397 if (avail < 1)
11398 goto done;
11399 switch (ctxt->instate) {
11400 case XML_PARSER_EOF:
11401 /*
11402 * Document parsing is done !
11403 */
11404 goto done;
11405 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011406 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11407 xmlChar start[4];
11408 xmlCharEncoding enc;
11409
11410 /*
11411 * Very first chars read from the document flow.
11412 */
11413 if (avail < 4)
11414 goto done;
11415
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011416 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011417 * Get the 4 first bytes and decode the charset
11418 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011419 * plug some encoding conversion routines,
11420 * else xmlSwitchEncoding will set to (default)
11421 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011422 */
11423 start[0] = RAW;
11424 start[1] = NXT(1);
11425 start[2] = NXT(2);
11426 start[3] = NXT(3);
11427 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011428 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011429 break;
11430 }
Owen Taylor3473f882001-02-23 17:55:21 +000011431
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011432 if (avail < 2)
11433 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011434 cur = ctxt->input->cur[0];
11435 next = ctxt->input->cur[1];
11436 if (cur == 0) {
11437 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11438 ctxt->sax->setDocumentLocator(ctxt->userData,
11439 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011440 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011441 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011442#ifdef DEBUG_PUSH
11443 xmlGenericError(xmlGenericErrorContext,
11444 "PP: entering EOF\n");
11445#endif
11446 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11447 ctxt->sax->endDocument(ctxt->userData);
11448 goto done;
11449 }
11450 if ((cur == '<') && (next == '?')) {
11451 /* PI or XML decl */
11452 if (avail < 5) return(ret);
11453 if ((!terminate) &&
11454 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11455 return(ret);
11456 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11457 ctxt->sax->setDocumentLocator(ctxt->userData,
11458 &xmlDefaultSAXLocator);
11459 if ((ctxt->input->cur[2] == 'x') &&
11460 (ctxt->input->cur[3] == 'm') &&
11461 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011462 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011463 ret += 5;
11464#ifdef DEBUG_PUSH
11465 xmlGenericError(xmlGenericErrorContext,
11466 "PP: Parsing XML Decl\n");
11467#endif
11468 xmlParseXMLDecl(ctxt);
11469 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11470 /*
11471 * The XML REC instructs us to stop parsing right
11472 * here
11473 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011474 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011475 return(0);
11476 }
11477 ctxt->standalone = ctxt->input->standalone;
11478 if ((ctxt->encoding == NULL) &&
11479 (ctxt->input->encoding != NULL))
11480 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11481 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11482 (!ctxt->disableSAX))
11483 ctxt->sax->startDocument(ctxt->userData);
11484 ctxt->instate = XML_PARSER_MISC;
11485#ifdef DEBUG_PUSH
11486 xmlGenericError(xmlGenericErrorContext,
11487 "PP: entering MISC\n");
11488#endif
11489 } else {
11490 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11491 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11492 (!ctxt->disableSAX))
11493 ctxt->sax->startDocument(ctxt->userData);
11494 ctxt->instate = XML_PARSER_MISC;
11495#ifdef DEBUG_PUSH
11496 xmlGenericError(xmlGenericErrorContext,
11497 "PP: entering MISC\n");
11498#endif
11499 }
11500 } else {
11501 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11502 ctxt->sax->setDocumentLocator(ctxt->userData,
11503 &xmlDefaultSAXLocator);
11504 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011505 if (ctxt->version == NULL) {
11506 xmlErrMemory(ctxt, NULL);
11507 break;
11508 }
Owen Taylor3473f882001-02-23 17:55:21 +000011509 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11510 (!ctxt->disableSAX))
11511 ctxt->sax->startDocument(ctxt->userData);
11512 ctxt->instate = XML_PARSER_MISC;
11513#ifdef DEBUG_PUSH
11514 xmlGenericError(xmlGenericErrorContext,
11515 "PP: entering MISC\n");
11516#endif
11517 }
11518 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011519 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011520 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011521 const xmlChar *prefix = NULL;
11522 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011523 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011524
11525 if ((avail < 2) && (ctxt->inputNr == 1))
11526 goto done;
11527 cur = ctxt->input->cur[0];
11528 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011529 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011530 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011531 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11532 ctxt->sax->endDocument(ctxt->userData);
11533 goto done;
11534 }
11535 if (!terminate) {
11536 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011537 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011538 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011539 goto done;
11540 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11541 goto done;
11542 }
11543 }
11544 if (ctxt->spaceNr == 0)
11545 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011546 else if (*ctxt->space == -2)
11547 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 else
11549 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011550#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011551 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011552#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011553 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011554#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011555 else
11556 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011557#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011558 if (ctxt->instate == XML_PARSER_EOF)
11559 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011560 if (name == NULL) {
11561 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011562 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011563 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11564 ctxt->sax->endDocument(ctxt->userData);
11565 goto done;
11566 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011567#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011568 /*
11569 * [ VC: Root Element Type ]
11570 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011571 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011572 */
11573 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11574 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11575 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011576#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011577
11578 /*
11579 * Check for an Empty Element.
11580 */
11581 if ((RAW == '/') && (NXT(1) == '>')) {
11582 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011583
11584 if (ctxt->sax2) {
11585 if ((ctxt->sax != NULL) &&
11586 (ctxt->sax->endElementNs != NULL) &&
11587 (!ctxt->disableSAX))
11588 ctxt->sax->endElementNs(ctxt->userData, name,
11589 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011590 if (ctxt->nsNr - nsNr > 0)
11591 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011592#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011593 } else {
11594 if ((ctxt->sax != NULL) &&
11595 (ctxt->sax->endElement != NULL) &&
11596 (!ctxt->disableSAX))
11597 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011598#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011599 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011600 if (ctxt->instate == XML_PARSER_EOF)
11601 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011602 spacePop(ctxt);
11603 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011604 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011605 } else {
11606 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011607 }
Daniel Veillard65686452012-07-19 18:25:01 +080011608 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011609 break;
11610 }
11611 if (RAW == '>') {
11612 NEXT;
11613 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011614 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011615 "Couldn't find end of Start Tag %s\n",
11616 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011617 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011618 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011619 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011620 if (ctxt->sax2)
11621 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011622#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011623 else
11624 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011625#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011626
Daniel Veillarda880b122003-04-21 21:36:41 +000011627 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011628 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011629 break;
11630 }
11631 case XML_PARSER_CONTENT: {
11632 const xmlChar *test;
11633 unsigned int cons;
11634 if ((avail < 2) && (ctxt->inputNr == 1))
11635 goto done;
11636 cur = ctxt->input->cur[0];
11637 next = ctxt->input->cur[1];
11638
11639 test = CUR_PTR;
11640 cons = ctxt->input->consumed;
11641 if ((cur == '<') && (next == '/')) {
11642 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011643 break;
11644 } else if ((cur == '<') && (next == '?')) {
11645 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011646 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11647 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011648 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011649 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011650 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011651 ctxt->instate = XML_PARSER_CONTENT;
11652 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011653 } else if ((cur == '<') && (next != '!')) {
11654 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011655 break;
11656 } else if ((cur == '<') && (next == '!') &&
11657 (ctxt->input->cur[2] == '-') &&
11658 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011659 int term;
11660
11661 if (avail < 4)
11662 goto done;
11663 ctxt->input->cur += 4;
11664 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11665 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011666 if ((!terminate) && (term < 0)) {
11667 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011668 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011669 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011670 xmlParseComment(ctxt);
11671 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011672 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011673 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11674 (ctxt->input->cur[2] == '[') &&
11675 (ctxt->input->cur[3] == 'C') &&
11676 (ctxt->input->cur[4] == 'D') &&
11677 (ctxt->input->cur[5] == 'A') &&
11678 (ctxt->input->cur[6] == 'T') &&
11679 (ctxt->input->cur[7] == 'A') &&
11680 (ctxt->input->cur[8] == '[')) {
11681 SKIP(9);
11682 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011683 break;
11684 } else if ((cur == '<') && (next == '!') &&
11685 (avail < 9)) {
11686 goto done;
11687 } else if (cur == '&') {
11688 if ((!terminate) &&
11689 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11690 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011691 xmlParseReference(ctxt);
11692 } else {
11693 /* TODO Avoid the extra copy, handle directly !!! */
11694 /*
11695 * Goal of the following test is:
11696 * - minimize calls to the SAX 'character' callback
11697 * when they are mergeable
11698 * - handle an problem for isBlank when we only parse
11699 * a sequence of blank chars and the next one is
11700 * not available to check against '<' presence.
11701 * - tries to homogenize the differences in SAX
11702 * callbacks between the push and pull versions
11703 * of the parser.
11704 */
11705 if ((ctxt->inputNr == 1) &&
11706 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11707 if (!terminate) {
11708 if (ctxt->progressive) {
11709 if ((lastlt == NULL) ||
11710 (ctxt->input->cur > lastlt))
11711 goto done;
11712 } else if (xmlParseLookupSequence(ctxt,
11713 '<', 0, 0) < 0) {
11714 goto done;
11715 }
11716 }
11717 }
11718 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011719 xmlParseCharData(ctxt, 0);
11720 }
11721 /*
11722 * Pop-up of finished entities.
11723 */
11724 while ((RAW == 0) && (ctxt->inputNr > 1))
11725 xmlPopInput(ctxt);
11726 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011727 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11728 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011729 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011730 break;
11731 }
11732 break;
11733 }
11734 case XML_PARSER_END_TAG:
11735 if (avail < 2)
11736 goto done;
11737 if (!terminate) {
11738 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011739 /* > can be found unescaped in attribute values */
11740 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011741 goto done;
11742 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11743 goto done;
11744 }
11745 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011746 if (ctxt->sax2) {
11747 xmlParseEndTag2(ctxt,
11748 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11749 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011750 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011751 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011752 }
11753#ifdef LIBXML_SAX1_ENABLED
11754 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011755 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011756#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011757 if (ctxt->instate == XML_PARSER_EOF) {
11758 /* Nothing */
11759 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011760 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011761 } else {
11762 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011763 }
11764 break;
11765 case XML_PARSER_CDATA_SECTION: {
11766 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011767 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011768 * cdataBlock merge back contiguous callbacks.
11769 */
11770 int base;
11771
11772 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11773 if (base < 0) {
11774 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011775 int tmp;
11776
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011777 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011778 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011779 if (tmp < 0) {
11780 tmp = -tmp;
11781 ctxt->input->cur += tmp;
11782 goto encoding_error;
11783 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011784 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11785 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011786 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011787 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011788 else if (ctxt->sax->characters != NULL)
11789 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011790 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011791 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011792 if (ctxt->instate == XML_PARSER_EOF)
11793 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011794 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011795 ctxt->checkIndex = 0;
11796 }
11797 goto done;
11798 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011799 int tmp;
11800
David Kilzer4f8606c2016-01-05 13:38:09 -080011801 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011802 if ((tmp < 0) || (tmp != base)) {
11803 tmp = -tmp;
11804 ctxt->input->cur += tmp;
11805 goto encoding_error;
11806 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011807 if ((ctxt->sax != NULL) && (base == 0) &&
11808 (ctxt->sax->cdataBlock != NULL) &&
11809 (!ctxt->disableSAX)) {
11810 /*
11811 * Special case to provide identical behaviour
11812 * between pull and push parsers on enpty CDATA
11813 * sections
11814 */
11815 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11816 (!strncmp((const char *)&ctxt->input->cur[-9],
11817 "<![CDATA[", 9)))
11818 ctxt->sax->cdataBlock(ctxt->userData,
11819 BAD_CAST "", 0);
11820 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011821 (!ctxt->disableSAX)) {
11822 if (ctxt->sax->cdataBlock != NULL)
11823 ctxt->sax->cdataBlock(ctxt->userData,
11824 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011825 else if (ctxt->sax->characters != NULL)
11826 ctxt->sax->characters(ctxt->userData,
11827 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011828 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011829 if (ctxt->instate == XML_PARSER_EOF)
11830 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011831 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011832 ctxt->checkIndex = 0;
11833 ctxt->instate = XML_PARSER_CONTENT;
11834#ifdef DEBUG_PUSH
11835 xmlGenericError(xmlGenericErrorContext,
11836 "PP: entering CONTENT\n");
11837#endif
11838 }
11839 break;
11840 }
Owen Taylor3473f882001-02-23 17:55:21 +000011841 case XML_PARSER_MISC:
11842 SKIP_BLANKS;
11843 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011844 avail = ctxt->input->length -
11845 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011846 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011847 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011848 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011849 if (avail < 2)
11850 goto done;
11851 cur = ctxt->input->cur[0];
11852 next = ctxt->input->cur[1];
11853 if ((cur == '<') && (next == '?')) {
11854 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011855 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11856 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011857 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011858 }
Owen Taylor3473f882001-02-23 17:55:21 +000011859#ifdef DEBUG_PUSH
11860 xmlGenericError(xmlGenericErrorContext,
11861 "PP: Parsing PI\n");
11862#endif
11863 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011864 if (ctxt->instate == XML_PARSER_EOF)
11865 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011866 ctxt->instate = XML_PARSER_MISC;
11867 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011868 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011869 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011870 (ctxt->input->cur[2] == '-') &&
11871 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011872 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011873 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11874 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011875 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011876 }
Owen Taylor3473f882001-02-23 17:55:21 +000011877#ifdef DEBUG_PUSH
11878 xmlGenericError(xmlGenericErrorContext,
11879 "PP: Parsing Comment\n");
11880#endif
11881 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011882 if (ctxt->instate == XML_PARSER_EOF)
11883 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011884 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011885 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011886 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011887 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011888 (ctxt->input->cur[2] == 'D') &&
11889 (ctxt->input->cur[3] == 'O') &&
11890 (ctxt->input->cur[4] == 'C') &&
11891 (ctxt->input->cur[5] == 'T') &&
11892 (ctxt->input->cur[6] == 'Y') &&
11893 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011894 (ctxt->input->cur[8] == 'E')) {
11895 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011896 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11897 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011898 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011899 }
Owen Taylor3473f882001-02-23 17:55:21 +000011900#ifdef DEBUG_PUSH
11901 xmlGenericError(xmlGenericErrorContext,
11902 "PP: Parsing internal subset\n");
11903#endif
11904 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011905 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011906 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011907 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011908 if (ctxt->instate == XML_PARSER_EOF)
11909 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011910 if (RAW == '[') {
11911 ctxt->instate = XML_PARSER_DTD;
11912#ifdef DEBUG_PUSH
11913 xmlGenericError(xmlGenericErrorContext,
11914 "PP: entering DTD\n");
11915#endif
11916 } else {
11917 /*
11918 * Create and update the external subset.
11919 */
11920 ctxt->inSubset = 2;
11921 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11922 (ctxt->sax->externalSubset != NULL))
11923 ctxt->sax->externalSubset(ctxt->userData,
11924 ctxt->intSubName, ctxt->extSubSystem,
11925 ctxt->extSubURI);
11926 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011927 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011928 ctxt->instate = XML_PARSER_PROLOG;
11929#ifdef DEBUG_PUSH
11930 xmlGenericError(xmlGenericErrorContext,
11931 "PP: entering PROLOG\n");
11932#endif
11933 }
11934 } else if ((cur == '<') && (next == '!') &&
11935 (avail < 9)) {
11936 goto done;
11937 } else {
11938 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011939 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011940 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011941#ifdef DEBUG_PUSH
11942 xmlGenericError(xmlGenericErrorContext,
11943 "PP: entering START_TAG\n");
11944#endif
11945 }
11946 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011947 case XML_PARSER_PROLOG:
11948 SKIP_BLANKS;
11949 if (ctxt->input->buf == NULL)
11950 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11951 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011952 avail = xmlBufUse(ctxt->input->buf->buffer) -
11953 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011954 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011955 goto done;
11956 cur = ctxt->input->cur[0];
11957 next = ctxt->input->cur[1];
11958 if ((cur == '<') && (next == '?')) {
11959 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011960 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11961 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011962 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011963 }
Owen Taylor3473f882001-02-23 17:55:21 +000011964#ifdef DEBUG_PUSH
11965 xmlGenericError(xmlGenericErrorContext,
11966 "PP: Parsing PI\n");
11967#endif
11968 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011969 if (ctxt->instate == XML_PARSER_EOF)
11970 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011971 ctxt->instate = XML_PARSER_PROLOG;
11972 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011973 } else if ((cur == '<') && (next == '!') &&
11974 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11975 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011976 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11977 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011978 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011979 }
Owen Taylor3473f882001-02-23 17:55:21 +000011980#ifdef DEBUG_PUSH
11981 xmlGenericError(xmlGenericErrorContext,
11982 "PP: Parsing Comment\n");
11983#endif
11984 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011985 if (ctxt->instate == XML_PARSER_EOF)
11986 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011987 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011988 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011989 } else if ((cur == '<') && (next == '!') &&
11990 (avail < 4)) {
11991 goto done;
11992 } else {
11993 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011994 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011995 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011996 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011997#ifdef DEBUG_PUSH
11998 xmlGenericError(xmlGenericErrorContext,
11999 "PP: entering START_TAG\n");
12000#endif
12001 }
12002 break;
12003 case XML_PARSER_EPILOG:
12004 SKIP_BLANKS;
12005 if (ctxt->input->buf == NULL)
12006 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12007 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012008 avail = xmlBufUse(ctxt->input->buf->buffer) -
12009 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000012010 if (avail < 2)
12011 goto done;
12012 cur = ctxt->input->cur[0];
12013 next = ctxt->input->cur[1];
12014 if ((cur == '<') && (next == '?')) {
12015 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080012016 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12017 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000012018 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012019 }
Owen Taylor3473f882001-02-23 17:55:21 +000012020#ifdef DEBUG_PUSH
12021 xmlGenericError(xmlGenericErrorContext,
12022 "PP: Parsing PI\n");
12023#endif
12024 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012025 if (ctxt->instate == XML_PARSER_EOF)
12026 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012027 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080012028 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012029 } else if ((cur == '<') && (next == '!') &&
12030 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12031 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080012032 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12033 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012034 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080012035 }
Owen Taylor3473f882001-02-23 17:55:21 +000012036#ifdef DEBUG_PUSH
12037 xmlGenericError(xmlGenericErrorContext,
12038 "PP: Parsing Comment\n");
12039#endif
12040 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012041 if (ctxt->instate == XML_PARSER_EOF)
12042 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012043 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080012044 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012045 } else if ((cur == '<') && (next == '!') &&
12046 (avail < 4)) {
12047 goto done;
12048 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012049 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080012050 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012051#ifdef DEBUG_PUSH
12052 xmlGenericError(xmlGenericErrorContext,
12053 "PP: entering EOF\n");
12054#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012055 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012056 ctxt->sax->endDocument(ctxt->userData);
12057 goto done;
12058 }
12059 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012060 case XML_PARSER_DTD: {
12061 /*
12062 * Sorry but progressive parsing of the internal subset
12063 * is not expected to be supported. We first check that
12064 * the full content of the internal subset is available and
12065 * the parsing is launched only at that point.
12066 * Internal subset ends up with "']' S? '>'" in an unescaped
12067 * section and not in a ']]>' sequence which are conditional
12068 * sections (whoever argued to keep that crap in XML deserve
12069 * a place in hell !).
12070 */
12071 int base, i;
12072 xmlChar *buf;
12073 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012074 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000012075
12076 base = ctxt->input->cur - ctxt->input->base;
12077 if (base < 0) return(0);
12078 if (ctxt->checkIndex > base)
12079 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012080 buf = xmlBufContent(ctxt->input->buf->buffer);
12081 use = xmlBufUse(ctxt->input->buf->buffer);
12082 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000012083 if (quote != 0) {
12084 if (buf[base] == quote)
12085 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012086 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000012087 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012088 if ((quote == 0) && (buf[base] == '<')) {
12089 int found = 0;
12090 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012091 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000012092 (buf[base + 1] == '!') &&
12093 (buf[base + 2] == '-') &&
12094 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012095 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000012096 if ((buf[base] == '-') &&
12097 (buf[base + 1] == '-') &&
12098 (buf[base + 2] == '>')) {
12099 found = 1;
12100 base += 2;
12101 break;
12102 }
12103 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012104 if (!found) {
12105#if 0
12106 fprintf(stderr, "unfinished comment\n");
12107#endif
12108 break; /* for */
12109 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012110 continue;
12111 }
12112 }
Owen Taylor3473f882001-02-23 17:55:21 +000012113 if (buf[base] == '"') {
12114 quote = '"';
12115 continue;
12116 }
12117 if (buf[base] == '\'') {
12118 quote = '\'';
12119 continue;
12120 }
12121 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012122#if 0
12123 fprintf(stderr, "%c%c%c%c: ", buf[base],
12124 buf[base + 1], buf[base + 2], buf[base + 3]);
12125#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012126 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012127 break;
12128 if (buf[base + 1] == ']') {
12129 /* conditional crap, skip both ']' ! */
12130 base++;
12131 continue;
12132 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012133 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012134 if (buf[base + i] == '>') {
12135#if 0
12136 fprintf(stderr, "found\n");
12137#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012138 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012139 }
12140 if (!IS_BLANK_CH(buf[base + i])) {
12141#if 0
12142 fprintf(stderr, "not found\n");
12143#endif
12144 goto not_end_of_int_subset;
12145 }
Owen Taylor3473f882001-02-23 17:55:21 +000012146 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012147#if 0
12148 fprintf(stderr, "end of stream\n");
12149#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012150 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012151
Owen Taylor3473f882001-02-23 17:55:21 +000012152 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012153not_end_of_int_subset:
12154 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012155 }
12156 /*
12157 * We didn't found the end of the Internal subset
12158 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012159 if (quote == 0)
12160 ctxt->checkIndex = base;
12161 else
12162 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012163#ifdef DEBUG_PUSH
12164 if (next == 0)
12165 xmlGenericError(xmlGenericErrorContext,
12166 "PP: lookup of int subset end filed\n");
12167#endif
12168 goto done;
12169
12170found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012171 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012172 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012173 if (ctxt->instate == XML_PARSER_EOF)
12174 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012175 ctxt->inSubset = 2;
12176 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12177 (ctxt->sax->externalSubset != NULL))
12178 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12179 ctxt->extSubSystem, ctxt->extSubURI);
12180 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012181 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012182 if (ctxt->instate == XML_PARSER_EOF)
12183 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012184 ctxt->instate = XML_PARSER_PROLOG;
12185 ctxt->checkIndex = 0;
12186#ifdef DEBUG_PUSH
12187 xmlGenericError(xmlGenericErrorContext,
12188 "PP: entering PROLOG\n");
12189#endif
12190 break;
12191 }
12192 case XML_PARSER_COMMENT:
12193 xmlGenericError(xmlGenericErrorContext,
12194 "PP: internal error, state == COMMENT\n");
12195 ctxt->instate = XML_PARSER_CONTENT;
12196#ifdef DEBUG_PUSH
12197 xmlGenericError(xmlGenericErrorContext,
12198 "PP: entering CONTENT\n");
12199#endif
12200 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012201 case XML_PARSER_IGNORE:
12202 xmlGenericError(xmlGenericErrorContext,
12203 "PP: internal error, state == IGNORE");
12204 ctxt->instate = XML_PARSER_DTD;
12205#ifdef DEBUG_PUSH
12206 xmlGenericError(xmlGenericErrorContext,
12207 "PP: entering DTD\n");
12208#endif
12209 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012210 case XML_PARSER_PI:
12211 xmlGenericError(xmlGenericErrorContext,
12212 "PP: internal error, state == PI\n");
12213 ctxt->instate = XML_PARSER_CONTENT;
12214#ifdef DEBUG_PUSH
12215 xmlGenericError(xmlGenericErrorContext,
12216 "PP: entering CONTENT\n");
12217#endif
12218 break;
12219 case XML_PARSER_ENTITY_DECL:
12220 xmlGenericError(xmlGenericErrorContext,
12221 "PP: internal error, state == ENTITY_DECL\n");
12222 ctxt->instate = XML_PARSER_DTD;
12223#ifdef DEBUG_PUSH
12224 xmlGenericError(xmlGenericErrorContext,
12225 "PP: entering DTD\n");
12226#endif
12227 break;
12228 case XML_PARSER_ENTITY_VALUE:
12229 xmlGenericError(xmlGenericErrorContext,
12230 "PP: internal error, state == ENTITY_VALUE\n");
12231 ctxt->instate = XML_PARSER_CONTENT;
12232#ifdef DEBUG_PUSH
12233 xmlGenericError(xmlGenericErrorContext,
12234 "PP: entering DTD\n");
12235#endif
12236 break;
12237 case XML_PARSER_ATTRIBUTE_VALUE:
12238 xmlGenericError(xmlGenericErrorContext,
12239 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12240 ctxt->instate = XML_PARSER_START_TAG;
12241#ifdef DEBUG_PUSH
12242 xmlGenericError(xmlGenericErrorContext,
12243 "PP: entering START_TAG\n");
12244#endif
12245 break;
12246 case XML_PARSER_SYSTEM_LITERAL:
12247 xmlGenericError(xmlGenericErrorContext,
12248 "PP: internal error, state == SYSTEM_LITERAL\n");
12249 ctxt->instate = XML_PARSER_START_TAG;
12250#ifdef DEBUG_PUSH
12251 xmlGenericError(xmlGenericErrorContext,
12252 "PP: entering START_TAG\n");
12253#endif
12254 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012255 case XML_PARSER_PUBLIC_LITERAL:
12256 xmlGenericError(xmlGenericErrorContext,
12257 "PP: internal error, state == PUBLIC_LITERAL\n");
12258 ctxt->instate = XML_PARSER_START_TAG;
12259#ifdef DEBUG_PUSH
12260 xmlGenericError(xmlGenericErrorContext,
12261 "PP: entering START_TAG\n");
12262#endif
12263 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012264 }
12265 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012266done:
Owen Taylor3473f882001-02-23 17:55:21 +000012267#ifdef DEBUG_PUSH
12268 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12269#endif
12270 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012271encoding_error:
12272 {
12273 char buffer[150];
12274
12275 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12276 ctxt->input->cur[0], ctxt->input->cur[1],
12277 ctxt->input->cur[2], ctxt->input->cur[3]);
12278 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12279 "Input is not proper UTF-8, indicate encoding !\n%s",
12280 BAD_CAST buffer, NULL);
12281 }
12282 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012283}
12284
12285/**
Daniel Veillard65686452012-07-19 18:25:01 +080012286 * xmlParseCheckTransition:
12287 * @ctxt: an XML parser context
12288 * @chunk: a char array
12289 * @size: the size in byte of the chunk
12290 *
12291 * Check depending on the current parser state if the chunk given must be
12292 * processed immediately or one need more data to advance on parsing.
12293 *
12294 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12295 */
12296static int
12297xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12298 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12299 return(-1);
12300 if (ctxt->instate == XML_PARSER_START_TAG) {
12301 if (memchr(chunk, '>', size) != NULL)
12302 return(1);
12303 return(0);
12304 }
12305 if (ctxt->progressive == XML_PARSER_COMMENT) {
12306 if (memchr(chunk, '>', size) != NULL)
12307 return(1);
12308 return(0);
12309 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012310 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12311 if (memchr(chunk, '>', size) != NULL)
12312 return(1);
12313 return(0);
12314 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012315 if (ctxt->progressive == XML_PARSER_PI) {
12316 if (memchr(chunk, '>', size) != NULL)
12317 return(1);
12318 return(0);
12319 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012320 if (ctxt->instate == XML_PARSER_END_TAG) {
12321 if (memchr(chunk, '>', size) != NULL)
12322 return(1);
12323 return(0);
12324 }
12325 if ((ctxt->progressive == XML_PARSER_DTD) ||
12326 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012327 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012328 return(1);
12329 return(0);
12330 }
Daniel Veillard65686452012-07-19 18:25:01 +080012331 return(1);
12332}
12333
12334/**
Owen Taylor3473f882001-02-23 17:55:21 +000012335 * xmlParseChunk:
12336 * @ctxt: an XML parser context
12337 * @chunk: an char array
12338 * @size: the size in byte of the chunk
12339 * @terminate: last chunk indicator
12340 *
12341 * Parse a Chunk of memory
12342 *
12343 * Returns zero if no error, the xmlParserErrors otherwise.
12344 */
12345int
12346xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12347 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012348 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012349 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012350 size_t old_avail = 0;
12351 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012352
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012353 if (ctxt == NULL)
12354 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012355 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012356 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012357 if (ctxt->instate == XML_PARSER_EOF)
12358 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012359 if (ctxt->instate == XML_PARSER_START)
12360 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012361 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12362 (chunk[size - 1] == '\r')) {
12363 end_in_lf = 1;
12364 size--;
12365 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012366
12367xmldecl_done:
12368
Owen Taylor3473f882001-02-23 17:55:21 +000012369 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12370 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012371 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12372 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012373 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012374
Daniel Veillard65686452012-07-19 18:25:01 +080012375 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012376 /*
12377 * Specific handling if we autodetected an encoding, we should not
12378 * push more than the first line ... which depend on the encoding
12379 * And only push the rest once the final encoding was detected
12380 */
12381 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12382 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012383 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012384
12385 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12386 BAD_CAST "UTF-16")) ||
12387 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12388 BAD_CAST "UTF16")))
12389 len = 90;
12390 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12391 BAD_CAST "UCS-4")) ||
12392 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12393 BAD_CAST "UCS4")))
12394 len = 180;
12395
12396 if (ctxt->input->buf->rawconsumed < len)
12397 len -= ctxt->input->buf->rawconsumed;
12398
Raul Hudeaba9716a2010-03-15 10:13:29 +010012399 /*
12400 * Change size for reading the initial declaration only
12401 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12402 * will blindly copy extra bytes from memory.
12403 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012404 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012405 remain = size - len;
12406 size = len;
12407 } else {
12408 remain = 0;
12409 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012410 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012411 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012412 if (res < 0) {
12413 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012414 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012415 return (XML_PARSER_EOF);
12416 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012417 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012418#ifdef DEBUG_PUSH
12419 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12420#endif
12421
Owen Taylor3473f882001-02-23 17:55:21 +000012422 } else if (ctxt->instate != XML_PARSER_EOF) {
12423 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12424 xmlParserInputBufferPtr in = ctxt->input->buf;
12425 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12426 (in->raw != NULL)) {
12427 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012428 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12429 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012430
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012431 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012432 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012433 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012434 xmlGenericError(xmlGenericErrorContext,
12435 "xmlParseChunk: encoder error\n");
12436 return(XML_ERR_INVALID_ENCODING);
12437 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012438 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012439 }
12440 }
12441 }
Daniel Veillard65686452012-07-19 18:25:01 +080012442 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012443 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012444 } else {
12445 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12446 avail = xmlBufUse(ctxt->input->buf->buffer);
12447 /*
12448 * Depending on the current state it may not be such
12449 * a good idea to try parsing if there is nothing in the chunk
12450 * which would be worth doing a parser state transition and we
12451 * need to wait for more data
12452 */
12453 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12454 (old_avail == 0) || (avail == 0) ||
12455 (xmlParseCheckTransition(ctxt,
12456 (const char *)&ctxt->input->base[old_avail],
12457 avail - old_avail)))
12458 xmlParseTryOrFinish(ctxt, terminate);
12459 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012460 if (ctxt->instate == XML_PARSER_EOF)
12461 return(ctxt->errNo);
12462
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012463 if ((ctxt->input != NULL) &&
12464 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12465 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12466 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12467 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012468 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012469 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012470 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12471 return(ctxt->errNo);
12472
12473 if (remain != 0) {
12474 chunk += size;
12475 size = remain;
12476 remain = 0;
12477 goto xmldecl_done;
12478 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012479 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12480 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012481 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12482 ctxt->input);
12483 size_t current = ctxt->input->cur - ctxt->input->base;
12484
Daniel Veillarda617e242006-01-09 14:38:44 +000012485 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012486
12487 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12488 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012489 }
Owen Taylor3473f882001-02-23 17:55:21 +000012490 if (terminate) {
12491 /*
12492 * Check for termination
12493 */
Daniel Veillard65686452012-07-19 18:25:01 +080012494 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012495
12496 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012497 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012498 cur_avail = ctxt->input->length -
12499 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012500 else
Daniel Veillard65686452012-07-19 18:25:01 +080012501 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12502 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012503 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012504
Owen Taylor3473f882001-02-23 17:55:21 +000012505 if ((ctxt->instate != XML_PARSER_EOF) &&
12506 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012507 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012508 }
Daniel Veillard65686452012-07-19 18:25:01 +080012509 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012510 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012511 }
Owen Taylor3473f882001-02-23 17:55:21 +000012512 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012513 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012514 ctxt->sax->endDocument(ctxt->userData);
12515 }
12516 ctxt->instate = XML_PARSER_EOF;
12517 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012518 if (ctxt->wellFormed == 0)
12519 return((xmlParserErrors) ctxt->errNo);
12520 else
12521 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012522}
12523
12524/************************************************************************
12525 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012526 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012527 * *
12528 ************************************************************************/
12529
12530/**
Owen Taylor3473f882001-02-23 17:55:21 +000012531 * xmlCreatePushParserCtxt:
12532 * @sax: a SAX handler
12533 * @user_data: The user data returned on SAX callbacks
12534 * @chunk: a pointer to an array of chars
12535 * @size: number of chars in the array
12536 * @filename: an optional file name or URI
12537 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012538 * Create a parser context for using the XML parser in push mode.
12539 * If @buffer and @size are non-NULL, the data is used to detect
12540 * the encoding. The remaining characters will be parsed so they
12541 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012542 * To allow content encoding detection, @size should be >= 4
12543 * The value of @filename is used for fetching external entities
12544 * and error/warning reports.
12545 *
12546 * Returns the new parser context or NULL
12547 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012548
Owen Taylor3473f882001-02-23 17:55:21 +000012549xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012550xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012551 const char *chunk, int size, const char *filename) {
12552 xmlParserCtxtPtr ctxt;
12553 xmlParserInputPtr inputStream;
12554 xmlParserInputBufferPtr buf;
12555 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12556
12557 /*
12558 * plug some encoding conversion routines
12559 */
12560 if ((chunk != NULL) && (size >= 4))
12561 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12562
12563 buf = xmlAllocParserInputBuffer(enc);
12564 if (buf == NULL) return(NULL);
12565
12566 ctxt = xmlNewParserCtxt();
12567 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012568 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012569 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012570 return(NULL);
12571 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012572 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012573 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12574 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012575 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012576 xmlFreeParserInputBuffer(buf);
12577 xmlFreeParserCtxt(ctxt);
12578 return(NULL);
12579 }
Owen Taylor3473f882001-02-23 17:55:21 +000012580 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012581#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012582 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012583#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012584 xmlFree(ctxt->sax);
12585 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12586 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012587 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012588 xmlFreeParserInputBuffer(buf);
12589 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012590 return(NULL);
12591 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012592 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12593 if (sax->initialized == XML_SAX2_MAGIC)
12594 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12595 else
12596 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012597 if (user_data != NULL)
12598 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012599 }
Owen Taylor3473f882001-02-23 17:55:21 +000012600 if (filename == NULL) {
12601 ctxt->directory = NULL;
12602 } else {
12603 ctxt->directory = xmlParserGetDirectory(filename);
12604 }
12605
12606 inputStream = xmlNewInputStream(ctxt);
12607 if (inputStream == NULL) {
12608 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012609 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012610 return(NULL);
12611 }
12612
12613 if (filename == NULL)
12614 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012615 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012616 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012617 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012618 if (inputStream->filename == NULL) {
12619 xmlFreeParserCtxt(ctxt);
12620 xmlFreeParserInputBuffer(buf);
12621 return(NULL);
12622 }
12623 }
Owen Taylor3473f882001-02-23 17:55:21 +000012624 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012625 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012626 inputPush(ctxt, inputStream);
12627
William M. Brack3a1cd212005-02-11 14:35:54 +000012628 /*
12629 * If the caller didn't provide an initial 'chunk' for determining
12630 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12631 * that it can be automatically determined later
12632 */
12633 if ((size == 0) || (chunk == NULL)) {
12634 ctxt->charset = XML_CHAR_ENCODING_NONE;
12635 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012636 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12637 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012638
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012639 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012640
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012641 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012642#ifdef DEBUG_PUSH
12643 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12644#endif
12645 }
12646
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012647 if (enc != XML_CHAR_ENCODING_NONE) {
12648 xmlSwitchEncoding(ctxt, enc);
12649 }
12650
Owen Taylor3473f882001-02-23 17:55:21 +000012651 return(ctxt);
12652}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012653#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012654
12655/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012656 * xmlHaltParser:
12657 * @ctxt: an XML parser context
12658 *
12659 * Blocks further parser processing don't override error
12660 * for internal use
12661 */
12662static void
12663xmlHaltParser(xmlParserCtxtPtr ctxt) {
12664 if (ctxt == NULL)
12665 return;
12666 ctxt->instate = XML_PARSER_EOF;
12667 ctxt->disableSAX = 1;
12668 if (ctxt->input != NULL) {
12669 /*
12670 * in case there was a specific allocation deallocate before
12671 * overriding base
12672 */
12673 if (ctxt->input->free != NULL) {
12674 ctxt->input->free((xmlChar *) ctxt->input->base);
12675 ctxt->input->free = NULL;
12676 }
12677 ctxt->input->cur = BAD_CAST"";
12678 ctxt->input->base = ctxt->input->cur;
12679 }
12680}
12681
12682/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012683 * xmlStopParser:
12684 * @ctxt: an XML parser context
12685 *
12686 * Blocks further parser processing
12687 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012688void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012689xmlStopParser(xmlParserCtxtPtr ctxt) {
12690 if (ctxt == NULL)
12691 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012692 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012693 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012694}
12695
12696/**
Owen Taylor3473f882001-02-23 17:55:21 +000012697 * xmlCreateIOParserCtxt:
12698 * @sax: a SAX handler
12699 * @user_data: The user data returned on SAX callbacks
12700 * @ioread: an I/O read function
12701 * @ioclose: an I/O close function
12702 * @ioctx: an I/O handler
12703 * @enc: the charset encoding if known
12704 *
12705 * Create a parser context for using the XML parser with an existing
12706 * I/O stream
12707 *
12708 * Returns the new parser context or NULL
12709 */
12710xmlParserCtxtPtr
12711xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12712 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12713 void *ioctx, xmlCharEncoding enc) {
12714 xmlParserCtxtPtr ctxt;
12715 xmlParserInputPtr inputStream;
12716 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012717
Daniel Veillard42595322004-11-08 10:52:06 +000012718 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012719
12720 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012721 if (buf == NULL) {
12722 if (ioclose != NULL)
12723 ioclose(ioctx);
12724 return (NULL);
12725 }
Owen Taylor3473f882001-02-23 17:55:21 +000012726
12727 ctxt = xmlNewParserCtxt();
12728 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012729 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012730 return(NULL);
12731 }
12732 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012733#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012734 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012735#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012736 xmlFree(ctxt->sax);
12737 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12738 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012739 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012740 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012741 return(NULL);
12742 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012743 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12744 if (sax->initialized == XML_SAX2_MAGIC)
12745 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12746 else
12747 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012748 if (user_data != NULL)
12749 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012750 }
Owen Taylor3473f882001-02-23 17:55:21 +000012751
12752 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12753 if (inputStream == NULL) {
12754 xmlFreeParserCtxt(ctxt);
12755 return(NULL);
12756 }
12757 inputPush(ctxt, inputStream);
12758
12759 return(ctxt);
12760}
12761
Daniel Veillard4432df22003-09-28 18:58:27 +000012762#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012763/************************************************************************
12764 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012765 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012766 * *
12767 ************************************************************************/
12768
12769/**
12770 * xmlIOParseDTD:
12771 * @sax: the SAX handler block or NULL
12772 * @input: an Input Buffer
12773 * @enc: the charset encoding if known
12774 *
12775 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012776 *
Owen Taylor3473f882001-02-23 17:55:21 +000012777 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012778 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012779 */
12780
12781xmlDtdPtr
12782xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12783 xmlCharEncoding enc) {
12784 xmlDtdPtr ret = NULL;
12785 xmlParserCtxtPtr ctxt;
12786 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012787 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012788
12789 if (input == NULL)
12790 return(NULL);
12791
12792 ctxt = xmlNewParserCtxt();
12793 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012794 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012795 return(NULL);
12796 }
12797
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012798 /* We are loading a DTD */
12799 ctxt->options |= XML_PARSE_DTDLOAD;
12800
Owen Taylor3473f882001-02-23 17:55:21 +000012801 /*
12802 * Set-up the SAX context
12803 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012804 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012805 if (ctxt->sax != NULL)
12806 xmlFree(ctxt->sax);
12807 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012808 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012809 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012810 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012811
12812 /*
12813 * generate a parser input from the I/O handler
12814 */
12815
Daniel Veillard43caefb2003-12-07 19:32:22 +000012816 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012817 if (pinput == NULL) {
12818 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012819 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012820 xmlFreeParserCtxt(ctxt);
12821 return(NULL);
12822 }
12823
12824 /*
12825 * plug some encoding conversion routines here.
12826 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012827 if (xmlPushInput(ctxt, pinput) < 0) {
12828 if (sax != NULL) ctxt->sax = NULL;
12829 xmlFreeParserCtxt(ctxt);
12830 return(NULL);
12831 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012832 if (enc != XML_CHAR_ENCODING_NONE) {
12833 xmlSwitchEncoding(ctxt, enc);
12834 }
Owen Taylor3473f882001-02-23 17:55:21 +000012835
12836 pinput->filename = NULL;
12837 pinput->line = 1;
12838 pinput->col = 1;
12839 pinput->base = ctxt->input->cur;
12840 pinput->cur = ctxt->input->cur;
12841 pinput->free = NULL;
12842
12843 /*
12844 * let's parse that entity knowing it's an external subset.
12845 */
12846 ctxt->inSubset = 2;
12847 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012848 if (ctxt->myDoc == NULL) {
12849 xmlErrMemory(ctxt, "New Doc failed");
12850 return(NULL);
12851 }
12852 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012853 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12854 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012855
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012856 if ((enc == XML_CHAR_ENCODING_NONE) &&
12857 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012858 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012859 * Get the 4 first bytes and decode the charset
12860 * if enc != XML_CHAR_ENCODING_NONE
12861 * plug some encoding conversion routines.
12862 */
12863 start[0] = RAW;
12864 start[1] = NXT(1);
12865 start[2] = NXT(2);
12866 start[3] = NXT(3);
12867 enc = xmlDetectCharEncoding(start, 4);
12868 if (enc != XML_CHAR_ENCODING_NONE) {
12869 xmlSwitchEncoding(ctxt, enc);
12870 }
12871 }
12872
Owen Taylor3473f882001-02-23 17:55:21 +000012873 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12874
12875 if (ctxt->myDoc != NULL) {
12876 if (ctxt->wellFormed) {
12877 ret = ctxt->myDoc->extSubset;
12878 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012879 if (ret != NULL) {
12880 xmlNodePtr tmp;
12881
12882 ret->doc = NULL;
12883 tmp = ret->children;
12884 while (tmp != NULL) {
12885 tmp->doc = NULL;
12886 tmp = tmp->next;
12887 }
12888 }
Owen Taylor3473f882001-02-23 17:55:21 +000012889 } else {
12890 ret = NULL;
12891 }
12892 xmlFreeDoc(ctxt->myDoc);
12893 ctxt->myDoc = NULL;
12894 }
12895 if (sax != NULL) ctxt->sax = NULL;
12896 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012897
Owen Taylor3473f882001-02-23 17:55:21 +000012898 return(ret);
12899}
12900
12901/**
12902 * xmlSAXParseDTD:
12903 * @sax: the SAX handler block
12904 * @ExternalID: a NAME* containing the External ID of the DTD
12905 * @SystemID: a NAME* containing the URL to the DTD
12906 *
12907 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012908 *
Owen Taylor3473f882001-02-23 17:55:21 +000012909 * Returns the resulting xmlDtdPtr or NULL in case of error.
12910 */
12911
12912xmlDtdPtr
12913xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12914 const xmlChar *SystemID) {
12915 xmlDtdPtr ret = NULL;
12916 xmlParserCtxtPtr ctxt;
12917 xmlParserInputPtr input = NULL;
12918 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012919 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012920
12921 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12922
12923 ctxt = xmlNewParserCtxt();
12924 if (ctxt == NULL) {
12925 return(NULL);
12926 }
12927
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012928 /* We are loading a DTD */
12929 ctxt->options |= XML_PARSE_DTDLOAD;
12930
Owen Taylor3473f882001-02-23 17:55:21 +000012931 /*
12932 * Set-up the SAX context
12933 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012934 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012935 if (ctxt->sax != NULL)
12936 xmlFree(ctxt->sax);
12937 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012938 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012939 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012940
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012941 /*
12942 * Canonicalise the system ID
12943 */
12944 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012945 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012946 xmlFreeParserCtxt(ctxt);
12947 return(NULL);
12948 }
Owen Taylor3473f882001-02-23 17:55:21 +000012949
12950 /*
12951 * Ask the Entity resolver to load the damn thing
12952 */
12953
12954 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012955 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12956 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012957 if (input == NULL) {
12958 if (sax != NULL) ctxt->sax = NULL;
12959 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012960 if (systemIdCanonic != NULL)
12961 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012962 return(NULL);
12963 }
12964
12965 /*
12966 * plug some encoding conversion routines here.
12967 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012968 if (xmlPushInput(ctxt, input) < 0) {
12969 if (sax != NULL) ctxt->sax = NULL;
12970 xmlFreeParserCtxt(ctxt);
12971 if (systemIdCanonic != NULL)
12972 xmlFree(systemIdCanonic);
12973 return(NULL);
12974 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012975 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12976 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12977 xmlSwitchEncoding(ctxt, enc);
12978 }
Owen Taylor3473f882001-02-23 17:55:21 +000012979
12980 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012981 input->filename = (char *) systemIdCanonic;
12982 else
12983 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012984 input->line = 1;
12985 input->col = 1;
12986 input->base = ctxt->input->cur;
12987 input->cur = ctxt->input->cur;
12988 input->free = NULL;
12989
12990 /*
12991 * let's parse that entity knowing it's an external subset.
12992 */
12993 ctxt->inSubset = 2;
12994 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012995 if (ctxt->myDoc == NULL) {
12996 xmlErrMemory(ctxt, "New Doc failed");
12997 if (sax != NULL) ctxt->sax = NULL;
12998 xmlFreeParserCtxt(ctxt);
12999 return(NULL);
13000 }
13001 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000013002 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
13003 ExternalID, SystemID);
13004 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13005
13006 if (ctxt->myDoc != NULL) {
13007 if (ctxt->wellFormed) {
13008 ret = ctxt->myDoc->extSubset;
13009 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000013010 if (ret != NULL) {
13011 xmlNodePtr tmp;
13012
13013 ret->doc = NULL;
13014 tmp = ret->children;
13015 while (tmp != NULL) {
13016 tmp->doc = NULL;
13017 tmp = tmp->next;
13018 }
13019 }
Owen Taylor3473f882001-02-23 17:55:21 +000013020 } else {
13021 ret = NULL;
13022 }
13023 xmlFreeDoc(ctxt->myDoc);
13024 ctxt->myDoc = NULL;
13025 }
13026 if (sax != NULL) ctxt->sax = NULL;
13027 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013028
Owen Taylor3473f882001-02-23 17:55:21 +000013029 return(ret);
13030}
13031
Daniel Veillard4432df22003-09-28 18:58:27 +000013032
Owen Taylor3473f882001-02-23 17:55:21 +000013033/**
13034 * xmlParseDTD:
13035 * @ExternalID: a NAME* containing the External ID of the DTD
13036 * @SystemID: a NAME* containing the URL to the DTD
13037 *
13038 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000013039 *
Owen Taylor3473f882001-02-23 17:55:21 +000013040 * Returns the resulting xmlDtdPtr or NULL in case of error.
13041 */
13042
13043xmlDtdPtr
13044xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13045 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13046}
Daniel Veillard4432df22003-09-28 18:58:27 +000013047#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013048
13049/************************************************************************
13050 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013051 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000013052 * *
13053 ************************************************************************/
13054
13055/**
Owen Taylor3473f882001-02-23 17:55:21 +000013056 * xmlParseCtxtExternalEntity:
13057 * @ctx: the existing parsing context
13058 * @URL: the URL for the entity to load
13059 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013060 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013061 *
13062 * Parse an external general entity within an existing parsing context
13063 * An external general parsed entity is well-formed if it matches the
13064 * production labeled extParsedEnt.
13065 *
13066 * [78] extParsedEnt ::= TextDecl? content
13067 *
13068 * Returns 0 if the entity is well formed, -1 in case of args problem and
13069 * the parser error code otherwise
13070 */
13071
13072int
13073xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013074 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000013075 xmlParserCtxtPtr ctxt;
13076 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013077 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013078 xmlSAXHandlerPtr oldsax = NULL;
13079 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013080 xmlChar start[4];
13081 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013082
Daniel Veillardce682bc2004-11-05 17:22:25 +000013083 if (ctx == NULL) return(-1);
13084
Daniel Veillard0161e632008-08-28 15:36:32 +000013085 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13086 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013087 return(XML_ERR_ENTITY_LOOP);
13088 }
13089
Daniel Veillardcda96922001-08-21 10:56:31 +000013090 if (lst != NULL)
13091 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013092 if ((URL == NULL) && (ID == NULL))
13093 return(-1);
13094 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13095 return(-1);
13096
Rob Richards798743a2009-06-19 13:54:25 -040013097 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000013098 if (ctxt == NULL) {
13099 return(-1);
13100 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013101
Owen Taylor3473f882001-02-23 17:55:21 +000013102 oldsax = ctxt->sax;
13103 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013104 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013105 newDoc = xmlNewDoc(BAD_CAST "1.0");
13106 if (newDoc == NULL) {
13107 xmlFreeParserCtxt(ctxt);
13108 return(-1);
13109 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013110 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013111 if (ctx->myDoc->dict) {
13112 newDoc->dict = ctx->myDoc->dict;
13113 xmlDictReference(newDoc->dict);
13114 }
Owen Taylor3473f882001-02-23 17:55:21 +000013115 if (ctx->myDoc != NULL) {
13116 newDoc->intSubset = ctx->myDoc->intSubset;
13117 newDoc->extSubset = ctx->myDoc->extSubset;
13118 }
13119 if (ctx->myDoc->URL != NULL) {
13120 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13121 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013122 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13123 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013124 ctxt->sax = oldsax;
13125 xmlFreeParserCtxt(ctxt);
13126 newDoc->intSubset = NULL;
13127 newDoc->extSubset = NULL;
13128 xmlFreeDoc(newDoc);
13129 return(-1);
13130 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013131 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013132 nodePush(ctxt, newDoc->children);
13133 if (ctx->myDoc == NULL) {
13134 ctxt->myDoc = newDoc;
13135 } else {
13136 ctxt->myDoc = ctx->myDoc;
13137 newDoc->children->doc = ctx->myDoc;
13138 }
13139
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013140 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013141 * Get the 4 first bytes and decode the charset
13142 * if enc != XML_CHAR_ENCODING_NONE
13143 * plug some encoding conversion routines.
13144 */
13145 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013146 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13147 start[0] = RAW;
13148 start[1] = NXT(1);
13149 start[2] = NXT(2);
13150 start[3] = NXT(3);
13151 enc = xmlDetectCharEncoding(start, 4);
13152 if (enc != XML_CHAR_ENCODING_NONE) {
13153 xmlSwitchEncoding(ctxt, enc);
13154 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013155 }
13156
Owen Taylor3473f882001-02-23 17:55:21 +000013157 /*
13158 * Parse a possible text declaration first
13159 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013160 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013161 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013162 /*
13163 * An XML-1.0 document can't reference an entity not XML-1.0
13164 */
13165 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13166 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013167 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013168 "Version mismatch between document and entity\n");
13169 }
Owen Taylor3473f882001-02-23 17:55:21 +000013170 }
13171
13172 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013173 * If the user provided its own SAX callbacks then reuse the
13174 * useData callback field, otherwise the expected setup in a
13175 * DOM builder is to have userData == ctxt
13176 */
13177 if (ctx->userData == ctx)
13178 ctxt->userData = ctxt;
13179 else
13180 ctxt->userData = ctx->userData;
13181
13182 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013183 * Doing validity checking on chunk doesn't make sense
13184 */
13185 ctxt->instate = XML_PARSER_CONTENT;
13186 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013187 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013188 ctxt->loadsubset = ctx->loadsubset;
13189 ctxt->depth = ctx->depth + 1;
13190 ctxt->replaceEntities = ctx->replaceEntities;
13191 if (ctxt->validate) {
13192 ctxt->vctxt.error = ctx->vctxt.error;
13193 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013194 } else {
13195 ctxt->vctxt.error = NULL;
13196 ctxt->vctxt.warning = NULL;
13197 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013198 ctxt->vctxt.nodeTab = NULL;
13199 ctxt->vctxt.nodeNr = 0;
13200 ctxt->vctxt.nodeMax = 0;
13201 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013202 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13203 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013204 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13205 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13206 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013207 ctxt->dictNames = ctx->dictNames;
13208 ctxt->attsDefault = ctx->attsDefault;
13209 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013210 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013211
13212 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013213
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013214 ctx->validate = ctxt->validate;
13215 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013216 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013217 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013218 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013219 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013220 }
13221 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013222 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013223 }
13224
13225 if (!ctxt->wellFormed) {
13226 if (ctxt->errNo == 0)
13227 ret = 1;
13228 else
13229 ret = ctxt->errNo;
13230 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013231 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013232 xmlNodePtr cur;
13233
13234 /*
13235 * Return the newly created nodeset after unlinking it from
13236 * they pseudo parent.
13237 */
13238 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013239 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013240 while (cur != NULL) {
13241 cur->parent = NULL;
13242 cur = cur->next;
13243 }
13244 newDoc->children->children = NULL;
13245 }
13246 ret = 0;
13247 }
13248 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013249 ctxt->dict = NULL;
13250 ctxt->attsDefault = NULL;
13251 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013252 xmlFreeParserCtxt(ctxt);
13253 newDoc->intSubset = NULL;
13254 newDoc->extSubset = NULL;
13255 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013256
Owen Taylor3473f882001-02-23 17:55:21 +000013257 return(ret);
13258}
13259
13260/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013261 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013262 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013263 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013264 * @sax: the SAX handler bloc (possibly NULL)
13265 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13266 * @depth: Used for loop detection, use 0
13267 * @URL: the URL for the entity to load
13268 * @ID: the System ID for the entity to load
13269 * @list: the return value for the set of parsed nodes
13270 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013271 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013272 *
13273 * Returns 0 if the entity is well formed, -1 in case of args problem and
13274 * the parser error code otherwise
13275 */
13276
Daniel Veillard7d515752003-09-26 19:12:37 +000013277static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013278xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13279 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013280 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013281 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013282 xmlParserCtxtPtr ctxt;
13283 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013284 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013285 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013286 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013287 xmlChar start[4];
13288 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013289
Daniel Veillard0161e632008-08-28 15:36:32 +000013290 if (((depth > 40) &&
13291 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13292 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013293 return(XML_ERR_ENTITY_LOOP);
13294 }
13295
Owen Taylor3473f882001-02-23 17:55:21 +000013296 if (list != NULL)
13297 *list = NULL;
13298 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013299 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013300 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013301 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013302
13303
Rob Richards9c0aa472009-03-26 18:10:19 +000013304 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013305 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013306 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013307 if (oldctxt != NULL) {
13308 ctxt->_private = oldctxt->_private;
13309 ctxt->loadsubset = oldctxt->loadsubset;
13310 ctxt->validate = oldctxt->validate;
13311 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013312 ctxt->record_info = oldctxt->record_info;
13313 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13314 ctxt->node_seq.length = oldctxt->node_seq.length;
13315 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013316 } else {
13317 /*
13318 * Doing validity checking on chunk without context
13319 * doesn't make sense
13320 */
13321 ctxt->_private = NULL;
13322 ctxt->validate = 0;
13323 ctxt->external = 2;
13324 ctxt->loadsubset = 0;
13325 }
Owen Taylor3473f882001-02-23 17:55:21 +000013326 if (sax != NULL) {
13327 oldsax = ctxt->sax;
13328 ctxt->sax = sax;
13329 if (user_data != NULL)
13330 ctxt->userData = user_data;
13331 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013332 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013333 newDoc = xmlNewDoc(BAD_CAST "1.0");
13334 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013335 ctxt->node_seq.maximum = 0;
13336 ctxt->node_seq.length = 0;
13337 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013338 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013339 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013340 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013341 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013342 newDoc->intSubset = doc->intSubset;
13343 newDoc->extSubset = doc->extSubset;
13344 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013345 xmlDictReference(newDoc->dict);
13346
Owen Taylor3473f882001-02-23 17:55:21 +000013347 if (doc->URL != NULL) {
13348 newDoc->URL = xmlStrdup(doc->URL);
13349 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013350 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13351 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013352 if (sax != NULL)
13353 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013354 ctxt->node_seq.maximum = 0;
13355 ctxt->node_seq.length = 0;
13356 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013357 xmlFreeParserCtxt(ctxt);
13358 newDoc->intSubset = NULL;
13359 newDoc->extSubset = NULL;
13360 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013361 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013362 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013363 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013364 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013365 ctxt->myDoc = doc;
13366 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013367
Daniel Veillard0161e632008-08-28 15:36:32 +000013368 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013369 * Get the 4 first bytes and decode the charset
13370 * if enc != XML_CHAR_ENCODING_NONE
13371 * plug some encoding conversion routines.
13372 */
13373 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013374 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13375 start[0] = RAW;
13376 start[1] = NXT(1);
13377 start[2] = NXT(2);
13378 start[3] = NXT(3);
13379 enc = xmlDetectCharEncoding(start, 4);
13380 if (enc != XML_CHAR_ENCODING_NONE) {
13381 xmlSwitchEncoding(ctxt, enc);
13382 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013383 }
13384
Owen Taylor3473f882001-02-23 17:55:21 +000013385 /*
13386 * Parse a possible text declaration first
13387 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013388 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013389 xmlParseTextDecl(ctxt);
13390 }
13391
Owen Taylor3473f882001-02-23 17:55:21 +000013392 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013393 ctxt->depth = depth;
13394
13395 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013396
Daniel Veillard561b7f82002-03-20 21:55:57 +000013397 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013398 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013399 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013400 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013401 }
13402 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013403 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013404 }
13405
13406 if (!ctxt->wellFormed) {
13407 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013408 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013409 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013410 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013411 } else {
13412 if (list != NULL) {
13413 xmlNodePtr cur;
13414
13415 /*
13416 * Return the newly created nodeset after unlinking it from
13417 * they pseudo parent.
13418 */
13419 cur = newDoc->children->children;
13420 *list = cur;
13421 while (cur != NULL) {
13422 cur->parent = NULL;
13423 cur = cur->next;
13424 }
13425 newDoc->children->children = NULL;
13426 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013427 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013428 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013429
13430 /*
13431 * Record in the parent context the number of entities replacement
13432 * done when parsing that reference.
13433 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013434 if (oldctxt != NULL)
13435 oldctxt->nbentities += ctxt->nbentities;
13436
Daniel Veillard0161e632008-08-28 15:36:32 +000013437 /*
13438 * Also record the size of the entity parsed
13439 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013440 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013441 oldctxt->sizeentities += ctxt->input->consumed;
13442 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13443 }
13444 /*
13445 * And record the last error if any
13446 */
13447 if (ctxt->lastError.code != XML_ERR_OK)
13448 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13449
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013450 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013451 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013452 if (oldctxt != NULL) {
13453 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13454 oldctxt->node_seq.length = ctxt->node_seq.length;
13455 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13456 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013457 ctxt->node_seq.maximum = 0;
13458 ctxt->node_seq.length = 0;
13459 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013460 xmlFreeParserCtxt(ctxt);
13461 newDoc->intSubset = NULL;
13462 newDoc->extSubset = NULL;
13463 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013464
Owen Taylor3473f882001-02-23 17:55:21 +000013465 return(ret);
13466}
13467
Daniel Veillard81273902003-09-30 00:43:48 +000013468#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013469/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013470 * xmlParseExternalEntity:
13471 * @doc: the document the chunk pertains to
13472 * @sax: the SAX handler bloc (possibly NULL)
13473 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13474 * @depth: Used for loop detection, use 0
13475 * @URL: the URL for the entity to load
13476 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013477 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013478 *
13479 * Parse an external general entity
13480 * An external general parsed entity is well-formed if it matches the
13481 * production labeled extParsedEnt.
13482 *
13483 * [78] extParsedEnt ::= TextDecl? content
13484 *
13485 * Returns 0 if the entity is well formed, -1 in case of args problem and
13486 * the parser error code otherwise
13487 */
13488
13489int
13490xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013491 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013492 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013493 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013494}
13495
13496/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013497 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013498 * @doc: the document the chunk pertains to
13499 * @sax: the SAX handler bloc (possibly NULL)
13500 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13501 * @depth: Used for loop detection, use 0
13502 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013503 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013504 *
13505 * Parse a well-balanced chunk of an XML document
13506 * called by the parser
13507 * The allowed sequence for the Well Balanced Chunk is the one defined by
13508 * the content production in the XML grammar:
13509 *
13510 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13511 *
13512 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13513 * the parser error code otherwise
13514 */
13515
13516int
13517xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013518 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013519 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13520 depth, string, lst, 0 );
13521}
Daniel Veillard81273902003-09-30 00:43:48 +000013522#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013523
13524/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013525 * xmlParseBalancedChunkMemoryInternal:
13526 * @oldctxt: the existing parsing context
13527 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13528 * @user_data: the user data field for the parser context
13529 * @lst: the return value for the set of parsed nodes
13530 *
13531 *
13532 * Parse a well-balanced chunk of an XML document
13533 * called by the parser
13534 * The allowed sequence for the Well Balanced Chunk is the one defined by
13535 * the content production in the XML grammar:
13536 *
13537 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13538 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013539 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13540 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013541 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013542 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013543 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013544 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013545static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013546xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13547 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13548 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013549 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013550 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013551 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013552 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013553 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013554 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013555 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013556#ifdef SAX2
13557 int i;
13558#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013559
Daniel Veillard0161e632008-08-28 15:36:32 +000013560 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13561 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013562 return(XML_ERR_ENTITY_LOOP);
13563 }
13564
13565
13566 if (lst != NULL)
13567 *lst = NULL;
13568 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013569 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013570
13571 size = xmlStrlen(string);
13572
13573 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013574 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013575 if (user_data != NULL)
13576 ctxt->userData = user_data;
13577 else
13578 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013579 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13580 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013581 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13582 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13583 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013584
Daniel Veillard74eaec12009-08-26 15:57:20 +020013585#ifdef SAX2
13586 /* propagate namespaces down the entity */
13587 for (i = 0;i < oldctxt->nsNr;i += 2) {
13588 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13589 }
13590#endif
13591
Daniel Veillard328f48c2002-11-15 15:24:34 +000013592 oldsax = ctxt->sax;
13593 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013594 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013595 ctxt->replaceEntities = oldctxt->replaceEntities;
13596 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013597
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013598 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013599 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013600 newDoc = xmlNewDoc(BAD_CAST "1.0");
13601 if (newDoc == NULL) {
13602 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013603 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013604 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013605 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013606 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013607 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013608 newDoc->dict = ctxt->dict;
13609 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013610 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013611 } else {
13612 ctxt->myDoc = oldctxt->myDoc;
13613 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013614 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013615 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013616 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13617 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013618 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013619 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013620 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013621 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013622 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013623 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013624 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013625 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013626 ctxt->myDoc->children = NULL;
13627 ctxt->myDoc->last = NULL;
13628 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013629 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013630 ctxt->instate = XML_PARSER_CONTENT;
13631 ctxt->depth = oldctxt->depth + 1;
13632
Daniel Veillard328f48c2002-11-15 15:24:34 +000013633 ctxt->validate = 0;
13634 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013635 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13636 /*
13637 * ID/IDREF registration will be done in xmlValidateElement below
13638 */
13639 ctxt->loadsubset |= XML_SKIP_IDS;
13640 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013641 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013642 ctxt->attsDefault = oldctxt->attsDefault;
13643 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013644
Daniel Veillard68e9e742002-11-16 15:35:11 +000013645 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013646 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013647 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013648 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013649 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013650 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013651 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013653 }
13654
13655 if (!ctxt->wellFormed) {
13656 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013657 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013658 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013659 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013660 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013661 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013662 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013663
William M. Brack7b9154b2003-09-27 19:23:50 +000013664 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013665 xmlNodePtr cur;
13666
13667 /*
13668 * Return the newly created nodeset after unlinking it from
13669 * they pseudo parent.
13670 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013671 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013672 *lst = cur;
13673 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013674#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013675 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13676 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13677 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013678 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13679 oldctxt->myDoc, cur);
13680 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013681#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013682 cur->parent = NULL;
13683 cur = cur->next;
13684 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013685 ctxt->myDoc->children->children = NULL;
13686 }
13687 if (ctxt->myDoc != NULL) {
13688 xmlFreeNode(ctxt->myDoc->children);
13689 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013690 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013691 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013692
13693 /*
13694 * Record in the parent context the number of entities replacement
13695 * done when parsing that reference.
13696 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013697 if (oldctxt != NULL)
13698 oldctxt->nbentities += ctxt->nbentities;
13699
Daniel Veillard0161e632008-08-28 15:36:32 +000013700 /*
13701 * Also record the last error if any
13702 */
13703 if (ctxt->lastError.code != XML_ERR_OK)
13704 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13705
Daniel Veillard328f48c2002-11-15 15:24:34 +000013706 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013707 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013708 ctxt->attsDefault = NULL;
13709 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013710 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013711 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013712 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013713 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013714
Daniel Veillard328f48c2002-11-15 15:24:34 +000013715 return(ret);
13716}
13717
Daniel Veillard29b17482004-08-16 00:39:03 +000013718/**
13719 * xmlParseInNodeContext:
13720 * @node: the context node
13721 * @data: the input string
13722 * @datalen: the input string length in bytes
13723 * @options: a combination of xmlParserOption
13724 * @lst: the return value for the set of parsed nodes
13725 *
13726 * Parse a well-balanced chunk of an XML document
13727 * within the context (DTD, namespaces, etc ...) of the given node.
13728 *
13729 * The allowed sequence for the data is a Well Balanced Chunk defined by
13730 * the content production in the XML grammar:
13731 *
13732 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13733 *
13734 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13735 * error code otherwise
13736 */
13737xmlParserErrors
13738xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13739 int options, xmlNodePtr *lst) {
13740#ifdef SAX2
13741 xmlParserCtxtPtr ctxt;
13742 xmlDocPtr doc = NULL;
13743 xmlNodePtr fake, cur;
13744 int nsnr = 0;
13745
13746 xmlParserErrors ret = XML_ERR_OK;
13747
13748 /*
13749 * check all input parameters, grab the document
13750 */
13751 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13752 return(XML_ERR_INTERNAL_ERROR);
13753 switch (node->type) {
13754 case XML_ELEMENT_NODE:
13755 case XML_ATTRIBUTE_NODE:
13756 case XML_TEXT_NODE:
13757 case XML_CDATA_SECTION_NODE:
13758 case XML_ENTITY_REF_NODE:
13759 case XML_PI_NODE:
13760 case XML_COMMENT_NODE:
13761 case XML_DOCUMENT_NODE:
13762 case XML_HTML_DOCUMENT_NODE:
13763 break;
13764 default:
13765 return(XML_ERR_INTERNAL_ERROR);
13766
13767 }
13768 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13769 (node->type != XML_DOCUMENT_NODE) &&
13770 (node->type != XML_HTML_DOCUMENT_NODE))
13771 node = node->parent;
13772 if (node == NULL)
13773 return(XML_ERR_INTERNAL_ERROR);
13774 if (node->type == XML_ELEMENT_NODE)
13775 doc = node->doc;
13776 else
13777 doc = (xmlDocPtr) node;
13778 if (doc == NULL)
13779 return(XML_ERR_INTERNAL_ERROR);
13780
13781 /*
13782 * allocate a context and set-up everything not related to the
13783 * node position in the tree
13784 */
13785 if (doc->type == XML_DOCUMENT_NODE)
13786 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13787#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013788 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013789 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013790 /*
13791 * When parsing in context, it makes no sense to add implied
13792 * elements like html/body/etc...
13793 */
13794 options |= HTML_PARSE_NOIMPLIED;
13795 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013796#endif
13797 else
13798 return(XML_ERR_INTERNAL_ERROR);
13799
13800 if (ctxt == NULL)
13801 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013802
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013803 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013804 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13805 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13806 * we must wait until the last moment to free the original one.
13807 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013808 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013809 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013810 xmlDictFree(ctxt->dict);
13811 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013812 } else
13813 options |= XML_PARSE_NODICT;
13814
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013815 if (doc->encoding != NULL) {
13816 xmlCharEncodingHandlerPtr hdlr;
13817
13818 if (ctxt->encoding != NULL)
13819 xmlFree((xmlChar *) ctxt->encoding);
13820 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13821
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013822 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013823 if (hdlr != NULL) {
13824 xmlSwitchToEncoding(ctxt, hdlr);
13825 } else {
13826 return(XML_ERR_UNSUPPORTED_ENCODING);
13827 }
13828 }
13829
Daniel Veillard37334572008-07-31 08:20:02 +000013830 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013831 xmlDetectSAX2(ctxt);
13832 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013833 /* parsing in context, i.e. as within existing content */
13834 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013835
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013836 fake = xmlNewComment(NULL);
13837 if (fake == NULL) {
13838 xmlFreeParserCtxt(ctxt);
13839 return(XML_ERR_NO_MEMORY);
13840 }
13841 xmlAddChild(node, fake);
13842
Daniel Veillard29b17482004-08-16 00:39:03 +000013843 if (node->type == XML_ELEMENT_NODE) {
13844 nodePush(ctxt, node);
13845 /*
13846 * initialize the SAX2 namespaces stack
13847 */
13848 cur = node;
13849 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13850 xmlNsPtr ns = cur->nsDef;
13851 const xmlChar *iprefix, *ihref;
13852
13853 while (ns != NULL) {
13854 if (ctxt->dict) {
13855 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13856 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13857 } else {
13858 iprefix = ns->prefix;
13859 ihref = ns->href;
13860 }
13861
13862 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13863 nsPush(ctxt, iprefix, ihref);
13864 nsnr++;
13865 }
13866 ns = ns->next;
13867 }
13868 cur = cur->parent;
13869 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013870 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013871
13872 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13873 /*
13874 * ID/IDREF registration will be done in xmlValidateElement below
13875 */
13876 ctxt->loadsubset |= XML_SKIP_IDS;
13877 }
13878
Daniel Veillard499cc922006-01-18 17:22:35 +000013879#ifdef LIBXML_HTML_ENABLED
13880 if (doc->type == XML_HTML_DOCUMENT_NODE)
13881 __htmlParseContent(ctxt);
13882 else
13883#endif
13884 xmlParseContent(ctxt);
13885
Daniel Veillard29b17482004-08-16 00:39:03 +000013886 nsPop(ctxt, nsnr);
13887 if ((RAW == '<') && (NXT(1) == '/')) {
13888 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13889 } else if (RAW != 0) {
13890 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13891 }
13892 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13893 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13894 ctxt->wellFormed = 0;
13895 }
13896
13897 if (!ctxt->wellFormed) {
13898 if (ctxt->errNo == 0)
13899 ret = XML_ERR_INTERNAL_ERROR;
13900 else
13901 ret = (xmlParserErrors)ctxt->errNo;
13902 } else {
13903 ret = XML_ERR_OK;
13904 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013905
Daniel Veillard29b17482004-08-16 00:39:03 +000013906 /*
13907 * Return the newly created nodeset after unlinking it from
13908 * the pseudo sibling.
13909 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013910
Daniel Veillard29b17482004-08-16 00:39:03 +000013911 cur = fake->next;
13912 fake->next = NULL;
13913 node->last = fake;
13914
13915 if (cur != NULL) {
13916 cur->prev = NULL;
13917 }
13918
13919 *lst = cur;
13920
13921 while (cur != NULL) {
13922 cur->parent = NULL;
13923 cur = cur->next;
13924 }
13925
13926 xmlUnlinkNode(fake);
13927 xmlFreeNode(fake);
13928
13929
13930 if (ret != XML_ERR_OK) {
13931 xmlFreeNodeList(*lst);
13932 *lst = NULL;
13933 }
William M. Brackc3f81342004-10-03 01:22:44 +000013934
William M. Brackb7b54de2004-10-06 16:38:01 +000013935 if (doc->dict != NULL)
13936 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013937 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013938
Daniel Veillard29b17482004-08-16 00:39:03 +000013939 return(ret);
13940#else /* !SAX2 */
13941 return(XML_ERR_INTERNAL_ERROR);
13942#endif
13943}
13944
Daniel Veillard81273902003-09-30 00:43:48 +000013945#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013946/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013947 * xmlParseBalancedChunkMemoryRecover:
13948 * @doc: the document the chunk pertains to
13949 * @sax: the SAX handler bloc (possibly NULL)
13950 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13951 * @depth: Used for loop detection, use 0
13952 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13953 * @lst: the return value for the set of parsed nodes
13954 * @recover: return nodes even if the data is broken (use 0)
13955 *
13956 *
13957 * Parse a well-balanced chunk of an XML document
13958 * called by the parser
13959 * The allowed sequence for the Well Balanced Chunk is the one defined by
13960 * the content production in the XML grammar:
13961 *
13962 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13963 *
13964 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13965 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013966 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013967 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013968 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13969 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013970 */
13971int
13972xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013973 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013974 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013975 xmlParserCtxtPtr ctxt;
13976 xmlDocPtr newDoc;
13977 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013978 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013979 int size;
13980 int ret = 0;
13981
Daniel Veillard0161e632008-08-28 15:36:32 +000013982 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013983 return(XML_ERR_ENTITY_LOOP);
13984 }
13985
13986
Daniel Veillardcda96922001-08-21 10:56:31 +000013987 if (lst != NULL)
13988 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013989 if (string == NULL)
13990 return(-1);
13991
13992 size = xmlStrlen(string);
13993
13994 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13995 if (ctxt == NULL) return(-1);
13996 ctxt->userData = ctxt;
13997 if (sax != NULL) {
13998 oldsax = ctxt->sax;
13999 ctxt->sax = sax;
14000 if (user_data != NULL)
14001 ctxt->userData = user_data;
14002 }
14003 newDoc = xmlNewDoc(BAD_CAST "1.0");
14004 if (newDoc == NULL) {
14005 xmlFreeParserCtxt(ctxt);
14006 return(-1);
14007 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000014008 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014009 if ((doc != NULL) && (doc->dict != NULL)) {
14010 xmlDictFree(ctxt->dict);
14011 ctxt->dict = doc->dict;
14012 xmlDictReference(ctxt->dict);
14013 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14014 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14015 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14016 ctxt->dictNames = 1;
14017 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000014018 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014019 }
Owen Taylor3473f882001-02-23 17:55:21 +000014020 if (doc != NULL) {
14021 newDoc->intSubset = doc->intSubset;
14022 newDoc->extSubset = doc->extSubset;
14023 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014024 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14025 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000014026 if (sax != NULL)
14027 ctxt->sax = oldsax;
14028 xmlFreeParserCtxt(ctxt);
14029 newDoc->intSubset = NULL;
14030 newDoc->extSubset = NULL;
14031 xmlFreeDoc(newDoc);
14032 return(-1);
14033 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014034 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14035 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000014036 if (doc == NULL) {
14037 ctxt->myDoc = newDoc;
14038 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000014039 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000014040 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000014041 /* Ensure that doc has XML spec namespace */
14042 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14043 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000014044 }
14045 ctxt->instate = XML_PARSER_CONTENT;
14046 ctxt->depth = depth;
14047
14048 /*
14049 * Doing validity checking on chunk doesn't make sense
14050 */
14051 ctxt->validate = 0;
14052 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014053 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014054
Daniel Veillardb39bc392002-10-26 19:29:51 +000014055 if ( doc != NULL ){
14056 content = doc->children;
14057 doc->children = NULL;
14058 xmlParseContent(ctxt);
14059 doc->children = content;
14060 }
14061 else {
14062 xmlParseContent(ctxt);
14063 }
Owen Taylor3473f882001-02-23 17:55:21 +000014064 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014065 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014066 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014067 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014068 }
14069 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014070 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014071 }
14072
14073 if (!ctxt->wellFormed) {
14074 if (ctxt->errNo == 0)
14075 ret = 1;
14076 else
14077 ret = ctxt->errNo;
14078 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000014079 ret = 0;
14080 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014081
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014082 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14083 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000014084
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014085 /*
14086 * Return the newly created nodeset after unlinking it from
14087 * they pseudo parent.
14088 */
14089 cur = newDoc->children->children;
14090 *lst = cur;
14091 while (cur != NULL) {
14092 xmlSetTreeDoc(cur, doc);
14093 cur->parent = NULL;
14094 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000014095 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014096 newDoc->children->children = NULL;
14097 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014098
14099 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000014100 ctxt->sax = oldsax;
14101 xmlFreeParserCtxt(ctxt);
14102 newDoc->intSubset = NULL;
14103 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000014104 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014105 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000014106
Owen Taylor3473f882001-02-23 17:55:21 +000014107 return(ret);
14108}
14109
14110/**
14111 * xmlSAXParseEntity:
14112 * @sax: the SAX handler block
14113 * @filename: the filename
14114 *
14115 * parse an XML external entity out of context and build a tree.
14116 * It use the given SAX function block to handle the parsing callback.
14117 * If sax is NULL, fallback to the default DOM tree building routines.
14118 *
14119 * [78] extParsedEnt ::= TextDecl? content
14120 *
14121 * This correspond to a "Well Balanced" chunk
14122 *
14123 * Returns the resulting document tree
14124 */
14125
14126xmlDocPtr
14127xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14128 xmlDocPtr ret;
14129 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014130
14131 ctxt = xmlCreateFileParserCtxt(filename);
14132 if (ctxt == NULL) {
14133 return(NULL);
14134 }
14135 if (sax != NULL) {
14136 if (ctxt->sax != NULL)
14137 xmlFree(ctxt->sax);
14138 ctxt->sax = sax;
14139 ctxt->userData = NULL;
14140 }
14141
Owen Taylor3473f882001-02-23 17:55:21 +000014142 xmlParseExtParsedEnt(ctxt);
14143
14144 if (ctxt->wellFormed)
14145 ret = ctxt->myDoc;
14146 else {
14147 ret = NULL;
14148 xmlFreeDoc(ctxt->myDoc);
14149 ctxt->myDoc = NULL;
14150 }
14151 if (sax != NULL)
14152 ctxt->sax = NULL;
14153 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000014154
Owen Taylor3473f882001-02-23 17:55:21 +000014155 return(ret);
14156}
14157
14158/**
14159 * xmlParseEntity:
14160 * @filename: the filename
14161 *
14162 * parse an XML external entity out of context and build a tree.
14163 *
14164 * [78] extParsedEnt ::= TextDecl? content
14165 *
14166 * This correspond to a "Well Balanced" chunk
14167 *
14168 * Returns the resulting document tree
14169 */
14170
14171xmlDocPtr
14172xmlParseEntity(const char *filename) {
14173 return(xmlSAXParseEntity(NULL, filename));
14174}
Daniel Veillard81273902003-09-30 00:43:48 +000014175#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014176
14177/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014178 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014179 * @URL: the entity URL
14180 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014181 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014182 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014183 *
14184 * Create a parser context for an external entity
14185 * Automatic support for ZLIB/Compress compressed document is provided
14186 * by default if found at compile-time.
14187 *
14188 * Returns the new parser context or NULL
14189 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014190static xmlParserCtxtPtr
14191xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14192 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014193 xmlParserCtxtPtr ctxt;
14194 xmlParserInputPtr inputStream;
14195 char *directory = NULL;
14196 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014197
Owen Taylor3473f882001-02-23 17:55:21 +000014198 ctxt = xmlNewParserCtxt();
14199 if (ctxt == NULL) {
14200 return(NULL);
14201 }
14202
Daniel Veillard48247b42009-07-10 16:12:46 +020014203 if (pctx != NULL) {
14204 ctxt->options = pctx->options;
14205 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014206 }
14207
Owen Taylor3473f882001-02-23 17:55:21 +000014208 uri = xmlBuildURI(URL, base);
14209
14210 if (uri == NULL) {
14211 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14212 if (inputStream == NULL) {
14213 xmlFreeParserCtxt(ctxt);
14214 return(NULL);
14215 }
14216
14217 inputPush(ctxt, inputStream);
14218
14219 if ((ctxt->directory == NULL) && (directory == NULL))
14220 directory = xmlParserGetDirectory((char *)URL);
14221 if ((ctxt->directory == NULL) && (directory != NULL))
14222 ctxt->directory = directory;
14223 } else {
14224 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14225 if (inputStream == NULL) {
14226 xmlFree(uri);
14227 xmlFreeParserCtxt(ctxt);
14228 return(NULL);
14229 }
14230
14231 inputPush(ctxt, inputStream);
14232
14233 if ((ctxt->directory == NULL) && (directory == NULL))
14234 directory = xmlParserGetDirectory((char *)uri);
14235 if ((ctxt->directory == NULL) && (directory != NULL))
14236 ctxt->directory = directory;
14237 xmlFree(uri);
14238 }
Owen Taylor3473f882001-02-23 17:55:21 +000014239 return(ctxt);
14240}
14241
Rob Richards9c0aa472009-03-26 18:10:19 +000014242/**
14243 * xmlCreateEntityParserCtxt:
14244 * @URL: the entity URL
14245 * @ID: the entity PUBLIC ID
14246 * @base: a possible base for the target URI
14247 *
14248 * Create a parser context for an external entity
14249 * Automatic support for ZLIB/Compress compressed document is provided
14250 * by default if found at compile-time.
14251 *
14252 * Returns the new parser context or NULL
14253 */
14254xmlParserCtxtPtr
14255xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14256 const xmlChar *base) {
14257 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14258
14259}
14260
Owen Taylor3473f882001-02-23 17:55:21 +000014261/************************************************************************
14262 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014263 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014264 * *
14265 ************************************************************************/
14266
14267/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014268 * xmlCreateURLParserCtxt:
14269 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014270 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014271 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014272 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014273 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014274 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014275 *
14276 * Returns the new parser context or NULL
14277 */
14278xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014279xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014280{
14281 xmlParserCtxtPtr ctxt;
14282 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014283 char *directory = NULL;
14284
Owen Taylor3473f882001-02-23 17:55:21 +000014285 ctxt = xmlNewParserCtxt();
14286 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014287 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014288 return(NULL);
14289 }
14290
Daniel Veillarddf292f72005-01-16 19:00:15 +000014291 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014292 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014293 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014294
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014295 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014296 if (inputStream == NULL) {
14297 xmlFreeParserCtxt(ctxt);
14298 return(NULL);
14299 }
14300
Owen Taylor3473f882001-02-23 17:55:21 +000014301 inputPush(ctxt, inputStream);
14302 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014303 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014304 if ((ctxt->directory == NULL) && (directory != NULL))
14305 ctxt->directory = directory;
14306
14307 return(ctxt);
14308}
14309
Daniel Veillard61b93382003-11-03 14:28:31 +000014310/**
14311 * xmlCreateFileParserCtxt:
14312 * @filename: the filename
14313 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014314 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014315 * Automatic support for ZLIB/Compress compressed document is provided
14316 * by default if found at compile-time.
14317 *
14318 * Returns the new parser context or NULL
14319 */
14320xmlParserCtxtPtr
14321xmlCreateFileParserCtxt(const char *filename)
14322{
14323 return(xmlCreateURLParserCtxt(filename, 0));
14324}
14325
Daniel Veillard81273902003-09-30 00:43:48 +000014326#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014327/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014328 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014329 * @sax: the SAX handler block
14330 * @filename: the filename
14331 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14332 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014333 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014334 *
14335 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14336 * compressed document is provided by default if found at compile-time.
14337 * It use the given SAX function block to handle the parsing callback.
14338 * If sax is NULL, fallback to the default DOM tree building routines.
14339 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014340 * User data (void *) is stored within the parser context in the
14341 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014342 *
Owen Taylor3473f882001-02-23 17:55:21 +000014343 * Returns the resulting document tree
14344 */
14345
14346xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014347xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14348 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014349 xmlDocPtr ret;
14350 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014351
Daniel Veillard635ef722001-10-29 11:48:19 +000014352 xmlInitParser();
14353
Owen Taylor3473f882001-02-23 17:55:21 +000014354 ctxt = xmlCreateFileParserCtxt(filename);
14355 if (ctxt == NULL) {
14356 return(NULL);
14357 }
14358 if (sax != NULL) {
14359 if (ctxt->sax != NULL)
14360 xmlFree(ctxt->sax);
14361 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014362 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014363 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014364 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014365 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014366 }
Owen Taylor3473f882001-02-23 17:55:21 +000014367
Daniel Veillard37d2d162008-03-14 10:54:00 +000014368 if (ctxt->directory == NULL)
14369 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014370
Daniel Veillarddad3f682002-11-17 16:47:27 +000014371 ctxt->recovery = recovery;
14372
Owen Taylor3473f882001-02-23 17:55:21 +000014373 xmlParseDocument(ctxt);
14374
William M. Brackc07329e2003-09-08 01:57:30 +000014375 if ((ctxt->wellFormed) || recovery) {
14376 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014377 if (ret != NULL) {
14378 if (ctxt->input->buf->compressed > 0)
14379 ret->compression = 9;
14380 else
14381 ret->compression = ctxt->input->buf->compressed;
14382 }
William M. Brackc07329e2003-09-08 01:57:30 +000014383 }
Owen Taylor3473f882001-02-23 17:55:21 +000014384 else {
14385 ret = NULL;
14386 xmlFreeDoc(ctxt->myDoc);
14387 ctxt->myDoc = NULL;
14388 }
14389 if (sax != NULL)
14390 ctxt->sax = NULL;
14391 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014392
Owen Taylor3473f882001-02-23 17:55:21 +000014393 return(ret);
14394}
14395
14396/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014397 * xmlSAXParseFile:
14398 * @sax: the SAX handler block
14399 * @filename: the filename
14400 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14401 * documents
14402 *
14403 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14404 * compressed document is provided by default if found at compile-time.
14405 * It use the given SAX function block to handle the parsing callback.
14406 * If sax is NULL, fallback to the default DOM tree building routines.
14407 *
14408 * Returns the resulting document tree
14409 */
14410
14411xmlDocPtr
14412xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14413 int recovery) {
14414 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14415}
14416
14417/**
Owen Taylor3473f882001-02-23 17:55:21 +000014418 * xmlRecoverDoc:
14419 * @cur: a pointer to an array of xmlChar
14420 *
14421 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014422 * In the case the document is not Well Formed, a attempt to build a
14423 * tree is tried anyway
14424 *
14425 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014426 */
14427
14428xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014429xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014430 return(xmlSAXParseDoc(NULL, cur, 1));
14431}
14432
14433/**
14434 * xmlParseFile:
14435 * @filename: the filename
14436 *
14437 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14438 * compressed document is provided by default if found at compile-time.
14439 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014440 * Returns the resulting document tree if the file was wellformed,
14441 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014442 */
14443
14444xmlDocPtr
14445xmlParseFile(const char *filename) {
14446 return(xmlSAXParseFile(NULL, filename, 0));
14447}
14448
14449/**
14450 * xmlRecoverFile:
14451 * @filename: the filename
14452 *
14453 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14454 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014455 * In the case the document is not Well Formed, it attempts to build
14456 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014457 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014458 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014459 */
14460
14461xmlDocPtr
14462xmlRecoverFile(const char *filename) {
14463 return(xmlSAXParseFile(NULL, filename, 1));
14464}
14465
14466
14467/**
14468 * xmlSetupParserForBuffer:
14469 * @ctxt: an XML parser context
14470 * @buffer: a xmlChar * buffer
14471 * @filename: a file name
14472 *
14473 * Setup the parser context to parse a new buffer; Clears any prior
14474 * contents from the parser context. The buffer parameter must not be
14475 * NULL, but the filename parameter can be
14476 */
14477void
14478xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14479 const char* filename)
14480{
14481 xmlParserInputPtr input;
14482
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014483 if ((ctxt == NULL) || (buffer == NULL))
14484 return;
14485
Owen Taylor3473f882001-02-23 17:55:21 +000014486 input = xmlNewInputStream(ctxt);
14487 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014488 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014489 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014490 return;
14491 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014492
Owen Taylor3473f882001-02-23 17:55:21 +000014493 xmlClearParserCtxt(ctxt);
14494 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014495 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014496 input->base = buffer;
14497 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014498 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014499 inputPush(ctxt, input);
14500}
14501
14502/**
14503 * xmlSAXUserParseFile:
14504 * @sax: a SAX handler
14505 * @user_data: The user data returned on SAX callbacks
14506 * @filename: a file name
14507 *
14508 * parse an XML file and call the given SAX handler routines.
14509 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014510 *
Owen Taylor3473f882001-02-23 17:55:21 +000014511 * Returns 0 in case of success or a error number otherwise
14512 */
14513int
14514xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14515 const char *filename) {
14516 int ret = 0;
14517 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014518
Owen Taylor3473f882001-02-23 17:55:21 +000014519 ctxt = xmlCreateFileParserCtxt(filename);
14520 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014521 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014522 xmlFree(ctxt->sax);
14523 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014524 xmlDetectSAX2(ctxt);
14525
Owen Taylor3473f882001-02-23 17:55:21 +000014526 if (user_data != NULL)
14527 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014528
Owen Taylor3473f882001-02-23 17:55:21 +000014529 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014530
Owen Taylor3473f882001-02-23 17:55:21 +000014531 if (ctxt->wellFormed)
14532 ret = 0;
14533 else {
14534 if (ctxt->errNo != 0)
14535 ret = ctxt->errNo;
14536 else
14537 ret = -1;
14538 }
14539 if (sax != NULL)
14540 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014541 if (ctxt->myDoc != NULL) {
14542 xmlFreeDoc(ctxt->myDoc);
14543 ctxt->myDoc = NULL;
14544 }
Owen Taylor3473f882001-02-23 17:55:21 +000014545 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014546
Owen Taylor3473f882001-02-23 17:55:21 +000014547 return ret;
14548}
Daniel Veillard81273902003-09-30 00:43:48 +000014549#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014550
14551/************************************************************************
14552 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014553 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014554 * *
14555 ************************************************************************/
14556
14557/**
14558 * xmlCreateMemoryParserCtxt:
14559 * @buffer: a pointer to a char array
14560 * @size: the size of the array
14561 *
14562 * Create a parser context for an XML in-memory document.
14563 *
14564 * Returns the new parser context or NULL
14565 */
14566xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014567xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014568 xmlParserCtxtPtr ctxt;
14569 xmlParserInputPtr input;
14570 xmlParserInputBufferPtr buf;
14571
14572 if (buffer == NULL)
14573 return(NULL);
14574 if (size <= 0)
14575 return(NULL);
14576
14577 ctxt = xmlNewParserCtxt();
14578 if (ctxt == NULL)
14579 return(NULL);
14580
Daniel Veillard53350552003-09-18 13:35:51 +000014581 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014582 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014583 if (buf == NULL) {
14584 xmlFreeParserCtxt(ctxt);
14585 return(NULL);
14586 }
Owen Taylor3473f882001-02-23 17:55:21 +000014587
14588 input = xmlNewInputStream(ctxt);
14589 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014590 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014591 xmlFreeParserCtxt(ctxt);
14592 return(NULL);
14593 }
14594
14595 input->filename = NULL;
14596 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014597 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014598
14599 inputPush(ctxt, input);
14600 return(ctxt);
14601}
14602
Daniel Veillard81273902003-09-30 00:43:48 +000014603#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014604/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014605 * xmlSAXParseMemoryWithData:
14606 * @sax: the SAX handler block
14607 * @buffer: an pointer to a char array
14608 * @size: the size of the array
14609 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14610 * documents
14611 * @data: the userdata
14612 *
14613 * parse an XML in-memory block and use the given SAX function block
14614 * to handle the parsing callback. If sax is NULL, fallback to the default
14615 * DOM tree building routines.
14616 *
14617 * User data (void *) is stored within the parser context in the
14618 * context's _private member, so it is available nearly everywhere in libxml
14619 *
14620 * Returns the resulting document tree
14621 */
14622
14623xmlDocPtr
14624xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14625 int size, int recovery, void *data) {
14626 xmlDocPtr ret;
14627 xmlParserCtxtPtr ctxt;
14628
Daniel Veillardab2a7632009-07-09 08:45:03 +020014629 xmlInitParser();
14630
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014631 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14632 if (ctxt == NULL) return(NULL);
14633 if (sax != NULL) {
14634 if (ctxt->sax != NULL)
14635 xmlFree(ctxt->sax);
14636 ctxt->sax = sax;
14637 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014638 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014639 if (data!=NULL) {
14640 ctxt->_private=data;
14641 }
14642
Daniel Veillardadba5f12003-04-04 16:09:01 +000014643 ctxt->recovery = recovery;
14644
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014645 xmlParseDocument(ctxt);
14646
14647 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14648 else {
14649 ret = NULL;
14650 xmlFreeDoc(ctxt->myDoc);
14651 ctxt->myDoc = NULL;
14652 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014653 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014654 ctxt->sax = NULL;
14655 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014656
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014657 return(ret);
14658}
14659
14660/**
Owen Taylor3473f882001-02-23 17:55:21 +000014661 * xmlSAXParseMemory:
14662 * @sax: the SAX handler block
14663 * @buffer: an pointer to a char array
14664 * @size: the size of the array
14665 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14666 * documents
14667 *
14668 * parse an XML in-memory block and use the given SAX function block
14669 * to handle the parsing callback. If sax is NULL, fallback to the default
14670 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014671 *
Owen Taylor3473f882001-02-23 17:55:21 +000014672 * Returns the resulting document tree
14673 */
14674xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014675xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14676 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014677 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014678}
14679
14680/**
14681 * xmlParseMemory:
14682 * @buffer: an pointer to a char array
14683 * @size: the size of the array
14684 *
14685 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014686 *
Owen Taylor3473f882001-02-23 17:55:21 +000014687 * Returns the resulting document tree
14688 */
14689
Daniel Veillard50822cb2001-07-26 20:05:51 +000014690xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014691 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14692}
14693
14694/**
14695 * xmlRecoverMemory:
14696 * @buffer: an pointer to a char array
14697 * @size: the size of the array
14698 *
14699 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014700 * In the case the document is not Well Formed, an attempt to
14701 * build a tree is tried anyway
14702 *
14703 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014704 */
14705
Daniel Veillard50822cb2001-07-26 20:05:51 +000014706xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014707 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14708}
14709
14710/**
14711 * xmlSAXUserParseMemory:
14712 * @sax: a SAX handler
14713 * @user_data: The user data returned on SAX callbacks
14714 * @buffer: an in-memory XML document input
14715 * @size: the length of the XML document in bytes
14716 *
14717 * A better SAX parsing routine.
14718 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014719 *
Owen Taylor3473f882001-02-23 17:55:21 +000014720 * Returns 0 in case of success or a error number otherwise
14721 */
14722int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014723 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014724 int ret = 0;
14725 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014726
14727 xmlInitParser();
14728
Owen Taylor3473f882001-02-23 17:55:21 +000014729 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14730 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014731 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14732 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014733 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014734 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014735
Daniel Veillard30211a02001-04-26 09:33:18 +000014736 if (user_data != NULL)
14737 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014738
Owen Taylor3473f882001-02-23 17:55:21 +000014739 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014740
Owen Taylor3473f882001-02-23 17:55:21 +000014741 if (ctxt->wellFormed)
14742 ret = 0;
14743 else {
14744 if (ctxt->errNo != 0)
14745 ret = ctxt->errNo;
14746 else
14747 ret = -1;
14748 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014749 if (sax != NULL)
14750 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014751 if (ctxt->myDoc != NULL) {
14752 xmlFreeDoc(ctxt->myDoc);
14753 ctxt->myDoc = NULL;
14754 }
Owen Taylor3473f882001-02-23 17:55:21 +000014755 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014756
Owen Taylor3473f882001-02-23 17:55:21 +000014757 return ret;
14758}
Daniel Veillard81273902003-09-30 00:43:48 +000014759#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014760
14761/**
14762 * xmlCreateDocParserCtxt:
14763 * @cur: a pointer to an array of xmlChar
14764 *
14765 * Creates a parser context for an XML in-memory document.
14766 *
14767 * Returns the new parser context or NULL
14768 */
14769xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014770xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014771 int len;
14772
14773 if (cur == NULL)
14774 return(NULL);
14775 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014776 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014777}
14778
Daniel Veillard81273902003-09-30 00:43:48 +000014779#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014780/**
14781 * xmlSAXParseDoc:
14782 * @sax: the SAX handler block
14783 * @cur: a pointer to an array of xmlChar
14784 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14785 * documents
14786 *
14787 * parse an XML in-memory document and build a tree.
14788 * It use the given SAX function block to handle the parsing callback.
14789 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014790 *
Owen Taylor3473f882001-02-23 17:55:21 +000014791 * Returns the resulting document tree
14792 */
14793
14794xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014795xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014796 xmlDocPtr ret;
14797 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014798 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014799
Daniel Veillard38936062004-11-04 17:45:11 +000014800 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014801
14802
14803 ctxt = xmlCreateDocParserCtxt(cur);
14804 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014805 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014806 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014807 ctxt->sax = sax;
14808 ctxt->userData = NULL;
14809 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014810 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014811
14812 xmlParseDocument(ctxt);
14813 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14814 else {
14815 ret = NULL;
14816 xmlFreeDoc(ctxt->myDoc);
14817 ctxt->myDoc = NULL;
14818 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014819 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014820 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014821 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014822
Owen Taylor3473f882001-02-23 17:55:21 +000014823 return(ret);
14824}
14825
14826/**
14827 * xmlParseDoc:
14828 * @cur: a pointer to an array of xmlChar
14829 *
14830 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014831 *
Owen Taylor3473f882001-02-23 17:55:21 +000014832 * Returns the resulting document tree
14833 */
14834
14835xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014836xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014837 return(xmlSAXParseDoc(NULL, cur, 0));
14838}
Daniel Veillard81273902003-09-30 00:43:48 +000014839#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014840
Daniel Veillard81273902003-09-30 00:43:48 +000014841#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014842/************************************************************************
14843 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014844 * Specific function to keep track of entities references *
14845 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014846 * *
14847 ************************************************************************/
14848
14849static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14850
14851/**
14852 * xmlAddEntityReference:
14853 * @ent : A valid entity
14854 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014855 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014856 *
14857 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14858 */
14859static void
14860xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14861 xmlNodePtr lastNode)
14862{
14863 if (xmlEntityRefFunc != NULL) {
14864 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14865 }
14866}
14867
14868
14869/**
14870 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014871 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014872 *
14873 * Set the function to call call back when a xml reference has been made
14874 */
14875void
14876xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14877{
14878 xmlEntityRefFunc = func;
14879}
Daniel Veillard81273902003-09-30 00:43:48 +000014880#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014881
14882/************************************************************************
14883 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014884 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014885 * *
14886 ************************************************************************/
14887
14888#ifdef LIBXML_XPATH_ENABLED
14889#include <libxml/xpath.h>
14890#endif
14891
Daniel Veillardffa3c742005-07-21 13:24:09 +000014892extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014893static int xmlParserInitialized = 0;
14894
14895/**
14896 * xmlInitParser:
14897 *
14898 * Initialization function for the XML parser.
14899 * This is not reentrant. Call once before processing in case of
14900 * use in multithreaded programs.
14901 */
14902
14903void
14904xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014905 if (xmlParserInitialized != 0)
14906 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014907
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014908#ifdef LIBXML_THREAD_ENABLED
14909 __xmlGlobalInitMutexLock();
14910 if (xmlParserInitialized == 0) {
14911#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014912 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014913 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014914 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14915 (xmlGenericError == NULL))
14916 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014917 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014918 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014919 xmlInitCharEncodingHandlers();
14920 xmlDefaultSAXHandlerInit();
14921 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014922#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014923 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014924#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014925#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014926 htmlInitAutoClose();
14927 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014928#endif
14929#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014930 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014931#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014932 xmlParserInitialized = 1;
14933#ifdef LIBXML_THREAD_ENABLED
14934 }
14935 __xmlGlobalInitMutexUnlock();
14936#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014937}
14938
14939/**
14940 * xmlCleanupParser:
14941 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014942 * This function name is somewhat misleading. It does not clean up
14943 * parser state, it cleans up memory allocated by the library itself.
14944 * It is a cleanup function for the XML library. It tries to reclaim all
14945 * related global memory allocated for the library processing.
14946 * It doesn't deallocate any document related memory. One should
14947 * call xmlCleanupParser() only when the process has finished using
14948 * the library and all XML/HTML documents built with it.
14949 * See also xmlInitParser() which has the opposite function of preparing
14950 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014951 *
14952 * WARNING: if your application is multithreaded or has plugin support
14953 * calling this may crash the application if another thread or
14954 * a plugin is still using libxml2. It's sometimes very hard to
14955 * guess if libxml2 is in use in the application, some libraries
14956 * or plugins may use it without notice. In case of doubt abstain
14957 * from calling this function or do it just before calling exit()
14958 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014959 */
14960
14961void
14962xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014963 if (!xmlParserInitialized)
14964 return;
14965
Owen Taylor3473f882001-02-23 17:55:21 +000014966 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014967#ifdef LIBXML_CATALOG_ENABLED
14968 xmlCatalogCleanup();
14969#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014970 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014971 xmlCleanupInputCallbacks();
14972#ifdef LIBXML_OUTPUT_ENABLED
14973 xmlCleanupOutputCallbacks();
14974#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014975#ifdef LIBXML_SCHEMAS_ENABLED
14976 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014977 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014978#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014979 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014980 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014981 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014982 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014983 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014984}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014985
14986/************************************************************************
14987 * *
14988 * New set (2.6.0) of simpler and more flexible APIs *
14989 * *
14990 ************************************************************************/
14991
14992/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014993 * DICT_FREE:
14994 * @str: a string
14995 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014996 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014997 * current scope
14998 */
14999#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015000 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015001 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
15002 xmlFree((char *)(str));
15003
15004/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015005 * xmlCtxtReset:
15006 * @ctxt: an XML parser context
15007 *
15008 * Reset a parser context
15009 */
15010void
15011xmlCtxtReset(xmlParserCtxtPtr ctxt)
15012{
15013 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015014 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015015
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015016 if (ctxt == NULL)
15017 return;
15018
15019 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015020
15021 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15022 xmlFreeInputStream(input);
15023 }
15024 ctxt->inputNr = 0;
15025 ctxt->input = NULL;
15026
15027 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000015028 if (ctxt->spaceTab != NULL) {
15029 ctxt->spaceTab[0] = -1;
15030 ctxt->space = &ctxt->spaceTab[0];
15031 } else {
15032 ctxt->space = NULL;
15033 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015034
15035
15036 ctxt->nodeNr = 0;
15037 ctxt->node = NULL;
15038
15039 ctxt->nameNr = 0;
15040 ctxt->name = NULL;
15041
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015042 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015044 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015045 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015046 DICT_FREE(ctxt->directory);
15047 ctxt->directory = NULL;
15048 DICT_FREE(ctxt->extSubURI);
15049 ctxt->extSubURI = NULL;
15050 DICT_FREE(ctxt->extSubSystem);
15051 ctxt->extSubSystem = NULL;
15052 if (ctxt->myDoc != NULL)
15053 xmlFreeDoc(ctxt->myDoc);
15054 ctxt->myDoc = NULL;
15055
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015056 ctxt->standalone = -1;
15057 ctxt->hasExternalSubset = 0;
15058 ctxt->hasPErefs = 0;
15059 ctxt->html = 0;
15060 ctxt->external = 0;
15061 ctxt->instate = XML_PARSER_START;
15062 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015063
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015064 ctxt->wellFormed = 1;
15065 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000015066 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015067 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015068#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015069 ctxt->vctxt.userData = ctxt;
15070 ctxt->vctxt.error = xmlParserValidityError;
15071 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015072#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015073 ctxt->record_info = 0;
15074 ctxt->nbChars = 0;
15075 ctxt->checkIndex = 0;
15076 ctxt->inSubset = 0;
15077 ctxt->errNo = XML_ERR_OK;
15078 ctxt->depth = 0;
15079 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15080 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000015081 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000015082 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080015083 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015084 xmlInitNodeInfoSeq(&ctxt->node_seq);
15085
15086 if (ctxt->attsDefault != NULL) {
15087 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15088 ctxt->attsDefault = NULL;
15089 }
15090 if (ctxt->attsSpecial != NULL) {
15091 xmlHashFree(ctxt->attsSpecial, NULL);
15092 ctxt->attsSpecial = NULL;
15093 }
15094
Daniel Veillard4432df22003-09-28 18:58:27 +000015095#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015096 if (ctxt->catalogs != NULL)
15097 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000015098#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000015099 if (ctxt->lastError.code != XML_ERR_OK)
15100 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015101}
15102
15103/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015104 * xmlCtxtResetPush:
15105 * @ctxt: an XML parser context
15106 * @chunk: a pointer to an array of chars
15107 * @size: number of chars in the array
15108 * @filename: an optional file name or URI
15109 * @encoding: the document encoding, or NULL
15110 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015111 * Reset a push parser context
15112 *
15113 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015114 */
15115int
15116xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15117 int size, const char *filename, const char *encoding)
15118{
15119 xmlParserInputPtr inputStream;
15120 xmlParserInputBufferPtr buf;
15121 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15122
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015123 if (ctxt == NULL)
15124 return(1);
15125
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015126 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15127 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15128
15129 buf = xmlAllocParserInputBuffer(enc);
15130 if (buf == NULL)
15131 return(1);
15132
15133 if (ctxt == NULL) {
15134 xmlFreeParserInputBuffer(buf);
15135 return(1);
15136 }
15137
15138 xmlCtxtReset(ctxt);
15139
15140 if (ctxt->pushTab == NULL) {
15141 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15142 sizeof(xmlChar *));
15143 if (ctxt->pushTab == NULL) {
15144 xmlErrMemory(ctxt, NULL);
15145 xmlFreeParserInputBuffer(buf);
15146 return(1);
15147 }
15148 }
15149
15150 if (filename == NULL) {
15151 ctxt->directory = NULL;
15152 } else {
15153 ctxt->directory = xmlParserGetDirectory(filename);
15154 }
15155
15156 inputStream = xmlNewInputStream(ctxt);
15157 if (inputStream == NULL) {
15158 xmlFreeParserInputBuffer(buf);
15159 return(1);
15160 }
15161
15162 if (filename == NULL)
15163 inputStream->filename = NULL;
15164 else
15165 inputStream->filename = (char *)
15166 xmlCanonicPath((const xmlChar *) filename);
15167 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080015168 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015169
15170 inputPush(ctxt, inputStream);
15171
15172 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15173 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015174 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15175 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015176
15177 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15178
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015179 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015180#ifdef DEBUG_PUSH
15181 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15182#endif
15183 }
15184
15185 if (encoding != NULL) {
15186 xmlCharEncodingHandlerPtr hdlr;
15187
Daniel Veillard37334572008-07-31 08:20:02 +000015188 if (ctxt->encoding != NULL)
15189 xmlFree((xmlChar *) ctxt->encoding);
15190 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15191
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015192 hdlr = xmlFindCharEncodingHandler(encoding);
15193 if (hdlr != NULL) {
15194 xmlSwitchToEncoding(ctxt, hdlr);
15195 } else {
15196 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15197 "Unsupported encoding %s\n", BAD_CAST encoding);
15198 }
15199 } else if (enc != XML_CHAR_ENCODING_NONE) {
15200 xmlSwitchEncoding(ctxt, enc);
15201 }
15202
15203 return(0);
15204}
15205
Daniel Veillard37334572008-07-31 08:20:02 +000015206
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015207/**
Daniel Veillard37334572008-07-31 08:20:02 +000015208 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015209 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015210 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015211 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015212 *
15213 * Applies the options to the parser context
15214 *
15215 * Returns 0 in case of success, the set of unknown or unimplemented options
15216 * in case of error.
15217 */
Daniel Veillard37334572008-07-31 08:20:02 +000015218static int
15219xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015220{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015221 if (ctxt == NULL)
15222 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015223 if (encoding != NULL) {
15224 if (ctxt->encoding != NULL)
15225 xmlFree((xmlChar *) ctxt->encoding);
15226 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15227 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015228 if (options & XML_PARSE_RECOVER) {
15229 ctxt->recovery = 1;
15230 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015231 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015232 } else
15233 ctxt->recovery = 0;
15234 if (options & XML_PARSE_DTDLOAD) {
15235 ctxt->loadsubset = XML_DETECT_IDS;
15236 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015237 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015238 } else
15239 ctxt->loadsubset = 0;
15240 if (options & XML_PARSE_DTDATTR) {
15241 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15242 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015243 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015244 }
15245 if (options & XML_PARSE_NOENT) {
15246 ctxt->replaceEntities = 1;
15247 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15248 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015249 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015250 } else
15251 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015252 if (options & XML_PARSE_PEDANTIC) {
15253 ctxt->pedantic = 1;
15254 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015255 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015256 } else
15257 ctxt->pedantic = 0;
15258 if (options & XML_PARSE_NOBLANKS) {
15259 ctxt->keepBlanks = 0;
15260 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15261 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015262 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015263 } else
15264 ctxt->keepBlanks = 1;
15265 if (options & XML_PARSE_DTDVALID) {
15266 ctxt->validate = 1;
15267 if (options & XML_PARSE_NOWARNING)
15268 ctxt->vctxt.warning = NULL;
15269 if (options & XML_PARSE_NOERROR)
15270 ctxt->vctxt.error = NULL;
15271 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015272 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015273 } else
15274 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015275 if (options & XML_PARSE_NOWARNING) {
15276 ctxt->sax->warning = NULL;
15277 options -= XML_PARSE_NOWARNING;
15278 }
15279 if (options & XML_PARSE_NOERROR) {
15280 ctxt->sax->error = NULL;
15281 ctxt->sax->fatalError = NULL;
15282 options -= XML_PARSE_NOERROR;
15283 }
Daniel Veillard81273902003-09-30 00:43:48 +000015284#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015285 if (options & XML_PARSE_SAX1) {
15286 ctxt->sax->startElement = xmlSAX2StartElement;
15287 ctxt->sax->endElement = xmlSAX2EndElement;
15288 ctxt->sax->startElementNs = NULL;
15289 ctxt->sax->endElementNs = NULL;
15290 ctxt->sax->initialized = 1;
15291 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015292 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015293 }
Daniel Veillard81273902003-09-30 00:43:48 +000015294#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015295 if (options & XML_PARSE_NODICT) {
15296 ctxt->dictNames = 0;
15297 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015298 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015299 } else {
15300 ctxt->dictNames = 1;
15301 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015302 if (options & XML_PARSE_NOCDATA) {
15303 ctxt->sax->cdataBlock = NULL;
15304 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015305 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015306 }
15307 if (options & XML_PARSE_NSCLEAN) {
15308 ctxt->options |= XML_PARSE_NSCLEAN;
15309 options -= XML_PARSE_NSCLEAN;
15310 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015311 if (options & XML_PARSE_NONET) {
15312 ctxt->options |= XML_PARSE_NONET;
15313 options -= XML_PARSE_NONET;
15314 }
Doran Moppert23040782017-04-07 16:45:56 +020015315 if (options & XML_PARSE_NOXXE) {
15316 ctxt->options |= XML_PARSE_NOXXE;
15317 options -= XML_PARSE_NOXXE;
15318 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015319 if (options & XML_PARSE_COMPACT) {
15320 ctxt->options |= XML_PARSE_COMPACT;
15321 options -= XML_PARSE_COMPACT;
15322 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015323 if (options & XML_PARSE_OLD10) {
15324 ctxt->options |= XML_PARSE_OLD10;
15325 options -= XML_PARSE_OLD10;
15326 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015327 if (options & XML_PARSE_NOBASEFIX) {
15328 ctxt->options |= XML_PARSE_NOBASEFIX;
15329 options -= XML_PARSE_NOBASEFIX;
15330 }
15331 if (options & XML_PARSE_HUGE) {
15332 ctxt->options |= XML_PARSE_HUGE;
15333 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015334 if (ctxt->dict != NULL)
15335 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015336 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015337 if (options & XML_PARSE_OLDSAX) {
15338 ctxt->options |= XML_PARSE_OLDSAX;
15339 options -= XML_PARSE_OLDSAX;
15340 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015341 if (options & XML_PARSE_IGNORE_ENC) {
15342 ctxt->options |= XML_PARSE_IGNORE_ENC;
15343 options -= XML_PARSE_IGNORE_ENC;
15344 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015345 if (options & XML_PARSE_BIG_LINES) {
15346 ctxt->options |= XML_PARSE_BIG_LINES;
15347 options -= XML_PARSE_BIG_LINES;
15348 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015349 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015350 return (options);
15351}
15352
15353/**
Daniel Veillard37334572008-07-31 08:20:02 +000015354 * xmlCtxtUseOptions:
15355 * @ctxt: an XML parser context
15356 * @options: a combination of xmlParserOption
15357 *
15358 * Applies the options to the parser context
15359 *
15360 * Returns 0 in case of success, the set of unknown or unimplemented options
15361 * in case of error.
15362 */
15363int
15364xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15365{
15366 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15367}
15368
15369/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015370 * xmlDoRead:
15371 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015372 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015373 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015374 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015375 * @reuse: keep the context for reuse
15376 *
15377 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015378 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015379 * Returns the resulting document tree or NULL
15380 */
15381static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015382xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15383 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015384{
15385 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015386
15387 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015388 if (encoding != NULL) {
15389 xmlCharEncodingHandlerPtr hdlr;
15390
15391 hdlr = xmlFindCharEncodingHandler(encoding);
15392 if (hdlr != NULL)
15393 xmlSwitchToEncoding(ctxt, hdlr);
15394 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015395 if ((URL != NULL) && (ctxt->input != NULL) &&
15396 (ctxt->input->filename == NULL))
15397 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015398 xmlParseDocument(ctxt);
15399 if ((ctxt->wellFormed) || ctxt->recovery)
15400 ret = ctxt->myDoc;
15401 else {
15402 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015403 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015404 xmlFreeDoc(ctxt->myDoc);
15405 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015406 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015407 ctxt->myDoc = NULL;
15408 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015409 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015410 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015411
15412 return (ret);
15413}
15414
15415/**
15416 * xmlReadDoc:
15417 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015418 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015419 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015420 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015421 *
15422 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015423 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015424 * Returns the resulting document tree
15425 */
15426xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015427xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428{
15429 xmlParserCtxtPtr ctxt;
15430
15431 if (cur == NULL)
15432 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015433 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015434
15435 ctxt = xmlCreateDocParserCtxt(cur);
15436 if (ctxt == NULL)
15437 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015438 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015439}
15440
15441/**
15442 * xmlReadFile:
15443 * @filename: a file or URL
15444 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015445 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015446 *
15447 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015448 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015449 * Returns the resulting document tree
15450 */
15451xmlDocPtr
15452xmlReadFile(const char *filename, const char *encoding, int options)
15453{
15454 xmlParserCtxtPtr ctxt;
15455
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015456 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015457 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015458 if (ctxt == NULL)
15459 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015460 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015461}
15462
15463/**
15464 * xmlReadMemory:
15465 * @buffer: a pointer to a char array
15466 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015467 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015468 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015469 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015470 *
15471 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015472 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015473 * Returns the resulting document tree
15474 */
15475xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015476xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015477{
15478 xmlParserCtxtPtr ctxt;
15479
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015480 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015481 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15482 if (ctxt == NULL)
15483 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015484 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015485}
15486
15487/**
15488 * xmlReadFd:
15489 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015490 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015491 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015492 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015493 *
15494 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015495 * NOTE that the file descriptor will not be closed when the
15496 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015497 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015498 * Returns the resulting document tree
15499 */
15500xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015501xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015502{
15503 xmlParserCtxtPtr ctxt;
15504 xmlParserInputBufferPtr input;
15505 xmlParserInputPtr stream;
15506
15507 if (fd < 0)
15508 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015509 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015510
15511 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15512 if (input == NULL)
15513 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015514 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015515 ctxt = xmlNewParserCtxt();
15516 if (ctxt == NULL) {
15517 xmlFreeParserInputBuffer(input);
15518 return (NULL);
15519 }
15520 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15521 if (stream == NULL) {
15522 xmlFreeParserInputBuffer(input);
15523 xmlFreeParserCtxt(ctxt);
15524 return (NULL);
15525 }
15526 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015527 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015528}
15529
15530/**
15531 * xmlReadIO:
15532 * @ioread: an I/O read function
15533 * @ioclose: an I/O close function
15534 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015535 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015536 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015537 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015538 *
15539 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015540 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015541 * Returns the resulting document tree
15542 */
15543xmlDocPtr
15544xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015545 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015546{
15547 xmlParserCtxtPtr ctxt;
15548 xmlParserInputBufferPtr input;
15549 xmlParserInputPtr stream;
15550
15551 if (ioread == NULL)
15552 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015553 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015554
15555 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15556 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015557 if (input == NULL) {
15558 if (ioclose != NULL)
15559 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015560 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015561 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015562 ctxt = xmlNewParserCtxt();
15563 if (ctxt == NULL) {
15564 xmlFreeParserInputBuffer(input);
15565 return (NULL);
15566 }
15567 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15568 if (stream == NULL) {
15569 xmlFreeParserInputBuffer(input);
15570 xmlFreeParserCtxt(ctxt);
15571 return (NULL);
15572 }
15573 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015574 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015575}
15576
15577/**
15578 * xmlCtxtReadDoc:
15579 * @ctxt: an XML parser context
15580 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015581 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015582 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015583 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015584 *
15585 * parse an XML in-memory document and build a tree.
15586 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015587 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015588 * Returns the resulting document tree
15589 */
15590xmlDocPtr
15591xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015592 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015593{
15594 xmlParserInputPtr stream;
15595
15596 if (cur == NULL)
15597 return (NULL);
15598 if (ctxt == NULL)
15599 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015600 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015601
15602 xmlCtxtReset(ctxt);
15603
15604 stream = xmlNewStringInputStream(ctxt, cur);
15605 if (stream == NULL) {
15606 return (NULL);
15607 }
15608 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015609 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015610}
15611
15612/**
15613 * xmlCtxtReadFile:
15614 * @ctxt: an XML parser context
15615 * @filename: a file or URL
15616 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015617 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015618 *
15619 * parse an XML file from the filesystem or the network.
15620 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015621 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015622 * Returns the resulting document tree
15623 */
15624xmlDocPtr
15625xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15626 const char *encoding, int options)
15627{
15628 xmlParserInputPtr stream;
15629
15630 if (filename == NULL)
15631 return (NULL);
15632 if (ctxt == NULL)
15633 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015634 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015635
15636 xmlCtxtReset(ctxt);
15637
Daniel Veillard29614c72004-11-26 10:47:26 +000015638 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015639 if (stream == NULL) {
15640 return (NULL);
15641 }
15642 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015643 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015644}
15645
15646/**
15647 * xmlCtxtReadMemory:
15648 * @ctxt: an XML parser context
15649 * @buffer: a pointer to a char array
15650 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015651 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015652 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015653 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015654 *
15655 * parse an XML in-memory document and build a tree.
15656 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015657 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015658 * Returns the resulting document tree
15659 */
15660xmlDocPtr
15661xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015662 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015663{
15664 xmlParserInputBufferPtr input;
15665 xmlParserInputPtr stream;
15666
15667 if (ctxt == NULL)
15668 return (NULL);
15669 if (buffer == NULL)
15670 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015671 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015672
15673 xmlCtxtReset(ctxt);
15674
15675 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15676 if (input == NULL) {
15677 return(NULL);
15678 }
15679
15680 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15681 if (stream == NULL) {
15682 xmlFreeParserInputBuffer(input);
15683 return(NULL);
15684 }
15685
15686 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015687 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015688}
15689
15690/**
15691 * xmlCtxtReadFd:
15692 * @ctxt: an XML parser context
15693 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015694 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015695 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015696 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015697 *
15698 * parse an XML from a file descriptor and build a tree.
15699 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015700 * NOTE that the file descriptor will not be closed when the
15701 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015702 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015703 * Returns the resulting document tree
15704 */
15705xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015706xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15707 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015708{
15709 xmlParserInputBufferPtr input;
15710 xmlParserInputPtr stream;
15711
15712 if (fd < 0)
15713 return (NULL);
15714 if (ctxt == NULL)
15715 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015716 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015717
15718 xmlCtxtReset(ctxt);
15719
15720
15721 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15722 if (input == NULL)
15723 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015724 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015725 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15726 if (stream == NULL) {
15727 xmlFreeParserInputBuffer(input);
15728 return (NULL);
15729 }
15730 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015731 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015732}
15733
15734/**
15735 * xmlCtxtReadIO:
15736 * @ctxt: an XML parser context
15737 * @ioread: an I/O read function
15738 * @ioclose: an I/O close function
15739 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015740 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015741 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015742 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015743 *
15744 * parse an XML document from I/O functions and source and build a tree.
15745 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015746 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015747 * Returns the resulting document tree
15748 */
15749xmlDocPtr
15750xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15751 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015752 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015753 const char *encoding, int options)
15754{
15755 xmlParserInputBufferPtr input;
15756 xmlParserInputPtr stream;
15757
15758 if (ioread == NULL)
15759 return (NULL);
15760 if (ctxt == NULL)
15761 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015762 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015763
15764 xmlCtxtReset(ctxt);
15765
15766 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15767 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015768 if (input == NULL) {
15769 if (ioclose != NULL)
15770 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015771 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015772 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015773 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15774 if (stream == NULL) {
15775 xmlFreeParserInputBuffer(input);
15776 return (NULL);
15777 }
15778 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015779 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015780}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015781
15782#define bottom_parser
15783#include "elfgcchack.h"