blob: 6286cad6196add0d1d0f068b28e68f89fd82207b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020046#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000048#include <libxml/threads.h>
49#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000050#include <libxml/tree.h>
51#include <libxml/parser.h>
52#include <libxml/parserInternals.h>
53#include <libxml/valid.h>
54#include <libxml/entities.h>
55#include <libxml/xmlerror.h>
56#include <libxml/encoding.h>
57#include <libxml/xmlIO.h>
58#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000059#ifdef LIBXML_CATALOG_ENABLED
60#include <libxml/catalog.h>
61#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000062#ifdef LIBXML_SCHEMAS_ENABLED
63#include <libxml/xmlschemastypes.h>
64#include <libxml/relaxng.h>
65#endif
Owen Taylor3473f882001-02-23 17:55:21 +000066#ifdef HAVE_CTYPE_H
67#include <ctype.h>
68#endif
69#ifdef HAVE_STDLIB_H
70#include <stdlib.h>
71#endif
72#ifdef HAVE_SYS_STAT_H
73#include <sys/stat.h>
74#endif
75#ifdef HAVE_FCNTL_H
76#include <fcntl.h>
77#endif
78#ifdef HAVE_UNISTD_H
79#include <unistd.h>
80#endif
81#ifdef HAVE_ZLIB_H
82#include <zlib.h>
83#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020084#ifdef HAVE_LZMA_H
85#include <lzma.h>
86#endif
Owen Taylor3473f882001-02-23 17:55:21 +000087
Daniel Veillard768eb3b2012-07-16 14:19:49 +080088#include "buf.h"
89#include "enc.h"
90
Daniel Veillard0161e632008-08-28 15:36:32 +000091static void
92xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
93
Rob Richards9c0aa472009-03-26 18:10:19 +000094static xmlParserCtxtPtr
95xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
96 const xmlChar *base, xmlParserCtxtPtr pctx);
97
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080098static void xmlHaltParser(xmlParserCtxtPtr ctxt);
99
Daniel Veillard0161e632008-08-28 15:36:32 +0000100/************************************************************************
101 * *
102 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
103 * *
104 ************************************************************************/
105
106#define XML_PARSER_BIG_ENTITY 1000
107#define XML_PARSER_LOT_ENTITY 5000
108
109/*
110 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
111 * replacement over the size in byte of the input indicates that you have
112 * and eponential behaviour. A value of 10 correspond to at least 3 entity
113 * replacement per byte of input.
114 */
115#define XML_PARSER_NON_LINEAR 10
116
117/*
118 * xmlParserEntityCheck
119 *
120 * Function to check non-linear entity expansion behaviour
121 * This is here to detect and stop exponential linear entity expansion
122 * This is not a limitation of the parser but a safety
123 * boundary feature. It can be disabled with the XML_PARSE_HUGE
124 * parser option.
125 */
126static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800128 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000129{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800130 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000131
132 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
133 return (0);
134 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
135 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800136
137 /*
138 * This may look absurd but is needed to detect
139 * entities problems
140 */
141 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800142 (ent->content != NULL) && (ent->checked == 0) &&
143 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800144 unsigned long oldnbent = ctxt->nbentities;
145 xmlChar *rep;
146
147 ent->checked = 1;
148
Peter Simons8f30bdf2016-04-15 11:56:55 +0200149 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800150 rep = xmlStringDecodeEntities(ctxt, ent->content,
151 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200152 --ctxt->depth;
Daniel Veillardbdd66182016-05-23 12:27:58 +0800153 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
154 ent->content[0] = 0;
155 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800156
157 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
158 if (rep != NULL) {
159 if (xmlStrchr(rep, '<'))
160 ent->checked |= 1;
161 xmlFree(rep);
162 rep = NULL;
163 }
164 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800165 if (replacement != 0) {
166 if (replacement < XML_MAX_TEXT_LENGTH)
167 return(0);
168
169 /*
170 * If the volume of entity copy reaches 10 times the
171 * amount of parsed data and over the large text threshold
172 * then that's very likely to be an abuse.
173 */
174 if (ctxt->input != NULL) {
175 consumed = ctxt->input->consumed +
176 (ctxt->input->cur - ctxt->input->base);
177 }
178 consumed += ctxt->sizeentities;
179
180 if (replacement < XML_PARSER_NON_LINEAR * consumed)
181 return(0);
182 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000183 /*
184 * Do the check based on the replacement size of the entity
185 */
186 if (size < XML_PARSER_BIG_ENTITY)
187 return(0);
188
189 /*
190 * A limit on the amount of text data reasonably used
191 */
192 if (ctxt->input != NULL) {
193 consumed = ctxt->input->consumed +
194 (ctxt->input->cur - ctxt->input->base);
195 }
196 consumed += ctxt->sizeentities;
197
198 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
199 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
200 return (0);
201 } else if (ent != NULL) {
202 /*
203 * use the number of parsed entities in the replacement
204 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800205 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000206
207 /*
208 * The amount of data parsed counting entities size only once
209 */
210 if (ctxt->input != NULL) {
211 consumed = ctxt->input->consumed +
212 (ctxt->input->cur - ctxt->input->base);
213 }
214 consumed += ctxt->sizeentities;
215
216 /*
217 * Check the density of entities for the amount of data
218 * knowing an entity reference will take at least 3 bytes
219 */
220 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
221 return (0);
222 } else {
223 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800224 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000225 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800226 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
227 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
228 (ctxt->nbentities <= 10000))
229 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000231 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
232 return (1);
233}
234
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000235/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000236 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000237 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000238 * arbitrary depth limit for the XML documents that we allow to
239 * process. This is not a limitation of the parser but a safety
240 * boundary feature. It can be disabled with the XML_PARSE_HUGE
241 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000242 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000243unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000244
Daniel Veillard0fb18932003-09-07 09:14:37 +0000245
Daniel Veillard0161e632008-08-28 15:36:32 +0000246
247#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000248#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000249#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000250#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
251
Daniel Veillard1f972e92012-08-15 10:16:37 +0800252/**
253 * XML_PARSER_CHUNK_SIZE
254 *
255 * When calling GROW that's the minimal amount of data
256 * the parser expected to have received. It is not a hard
257 * limit but an optimization when reading strings like Names
258 * It is not strictly needed as long as inputs available characters
259 * are followed by 0, which should be provided by the I/O level
260 */
261#define XML_PARSER_CHUNK_SIZE 100
262
Owen Taylor3473f882001-02-23 17:55:21 +0000263/*
Owen Taylor3473f882001-02-23 17:55:21 +0000264 * List of XML prefixed PI allowed by W3C specs
265 */
266
Daniel Veillardb44025c2001-10-11 22:55:55 +0000267static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000268 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800269 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000270 NULL
271};
272
Daniel Veillarda07050d2003-10-19 14:46:32 +0000273
Owen Taylor3473f882001-02-23 17:55:21 +0000274/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200275static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
276 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000277
Daniel Veillard7d515752003-09-26 19:12:37 +0000278static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000279xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
280 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000281 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000282 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000283
Daniel Veillard37334572008-07-31 08:20:02 +0000284static int
285xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
286 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000287#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000288static void
289xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
290 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000291#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000292
Daniel Veillard7d515752003-09-26 19:12:37 +0000293static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000294xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
295 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000296
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000297static int
298xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
299
Daniel Veillarde57ec792003-09-10 10:50:59 +0000300/************************************************************************
301 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800302 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 * *
304 ************************************************************************/
305
306/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000307 * xmlErrAttributeDup:
308 * @ctxt: an XML parser context
309 * @prefix: the attribute prefix
310 * @localname: the attribute localname
311 *
312 * Handle a redefinition of attribute error
313 */
314static void
315xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
316 const xmlChar * localname)
317{
Daniel Veillard157fee02003-10-31 10:36:03 +0000318 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
319 (ctxt->instate == XML_PARSER_EOF))
320 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000321 if (ctxt != NULL)
322 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200323
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000324 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000325 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200326 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 (const char *) localname, NULL, NULL, 0, 0,
328 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000329 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 (const char *) prefix, (const char *) localname,
333 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
334 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000335 if (ctxt != NULL) {
336 ctxt->wellFormed = 0;
337 if (ctxt->recovery == 0)
338 ctxt->disableSAX = 1;
339 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340}
341
342/**
343 * xmlFatalErr:
344 * @ctxt: an XML parser context
345 * @error: the error number
346 * @extra: extra information string
347 *
348 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
349 */
350static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352{
353 const char *errmsg;
354
Daniel Veillard157fee02003-10-31 10:36:03 +0000355 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
356 (ctxt->instate == XML_PARSER_EOF))
357 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 switch (error) {
359 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800360 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800363 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800366 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 errmsg = "internal error";
370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800372 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800375 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800378 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800381 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800384 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800387 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000389 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800390 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000392 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800393 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800396 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800399 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800402 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000403 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000404 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800405 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800408 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800411 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800414 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800417 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800420 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800423 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800426 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Fragment not allowed";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800441 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800444 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800447 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800450 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800453 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800456 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800459 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
474 case XML_ERR_CONDSEC_INVALID_KEYWORD:
475 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800476 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000477 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000478 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800479 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000480 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800482 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000484 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800485 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000487 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000489 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000490 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800491 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000493 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800494 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000496 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000499 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800503 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000504 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000507 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800509 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000510 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000511 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800512 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000513 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000514 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800515 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000516 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000517 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800518 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800521 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000522 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000523 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800524 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000525 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000526 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800527 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000528 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800529 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800530 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800531 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000532#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800534 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000535 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000536#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000537 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800538 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000539 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000540 if (ctxt != NULL)
541 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800542 if (info == NULL) {
543 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
544 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
545 errmsg);
546 } else {
547 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
548 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
549 errmsg, info);
550 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000551 if (ctxt != NULL) {
552 ctxt->wellFormed = 0;
553 if (ctxt->recovery == 0)
554 ctxt->disableSAX = 1;
555 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000556}
557
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000558/**
559 * xmlFatalErrMsg:
560 * @ctxt: an XML parser context
561 * @error: the error number
562 * @msg: the error message
563 *
564 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
565 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800566static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000567xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
568 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000569{
Daniel Veillard157fee02003-10-31 10:36:03 +0000570 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
571 (ctxt->instate == XML_PARSER_EOF))
572 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000573 if (ctxt != NULL)
574 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000575 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200576 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000577 if (ctxt != NULL) {
578 ctxt->wellFormed = 0;
579 if (ctxt->recovery == 0)
580 ctxt->disableSAX = 1;
581 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000582}
583
584/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585 * xmlWarningMsg:
586 * @ctxt: an XML parser context
587 * @error: the error number
588 * @msg: the error message
589 * @str1: extra data
590 * @str2: extra data
591 *
592 * Handle a warning.
593 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800594static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
596 const char *msg, const xmlChar *str1, const xmlChar *str2)
597{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000598 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000599
Daniel Veillard157fee02003-10-31 10:36:03 +0000600 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
601 (ctxt->instate == XML_PARSER_EOF))
602 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000603 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
604 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000605 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200606 if (ctxt != NULL) {
607 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000608 (ctxt->sax) ? ctxt->sax->warning : NULL,
609 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000610 ctxt, NULL, XML_FROM_PARSER, error,
611 XML_ERR_WARNING, NULL, 0,
612 (const char *) str1, (const char *) str2, NULL, 0, 0,
613 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200614 } else {
615 __xmlRaiseError(schannel, NULL, NULL,
616 ctxt, NULL, XML_FROM_PARSER, error,
617 XML_ERR_WARNING, NULL, 0,
618 (const char *) str1, (const char *) str2, NULL, 0, 0,
619 msg, (const char *) str1, (const char *) str2);
620 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000621}
622
623/**
624 * xmlValidityError:
625 * @ctxt: an XML parser context
626 * @error: the error number
627 * @msg: the error message
628 * @str1: extra data
629 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000630 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000631 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800632static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000633xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000634 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000635{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000636 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000637
638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if (ctxt != NULL) {
642 ctxt->errNo = error;
643 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
644 schannel = ctxt->sax->serror;
645 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200646 if (ctxt != NULL) {
647 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000648 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000649 ctxt, NULL, XML_FROM_DTD, error,
650 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000651 (const char *) str2, NULL, 0, 0,
652 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000653 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200654 } else {
655 __xmlRaiseError(schannel, NULL, NULL,
656 ctxt, NULL, XML_FROM_DTD, error,
657 XML_ERR_ERROR, NULL, 0, (const char *) str1,
658 (const char *) str2, NULL, 0, 0,
659 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000660 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000661}
662
663/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000664 * xmlFatalErrMsgInt:
665 * @ctxt: an XML parser context
666 * @error: the error number
667 * @msg: the error message
668 * @val: an integer value
669 *
670 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
671 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800672static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000673xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000674 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000675{
Daniel Veillard157fee02003-10-31 10:36:03 +0000676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677 (ctxt->instate == XML_PARSER_EOF))
678 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000679 if (ctxt != NULL)
680 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000681 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000682 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
683 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000684 if (ctxt != NULL) {
685 ctxt->wellFormed = 0;
686 if (ctxt->recovery == 0)
687 ctxt->disableSAX = 1;
688 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000689}
690
691/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000692 * xmlFatalErrMsgStrIntStr:
693 * @ctxt: an XML parser context
694 * @error: the error number
695 * @msg: the error message
696 * @str1: an string info
697 * @val: an integer value
698 * @str2: an string info
699 *
700 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
701 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800702static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000703xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800704 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000705 const xmlChar *str2)
706{
Daniel Veillard157fee02003-10-31 10:36:03 +0000707 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
708 (ctxt->instate == XML_PARSER_EOF))
709 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000710 if (ctxt != NULL)
711 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000712 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000713 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
714 NULL, 0, (const char *) str1, (const char *) str2,
715 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL) {
717 ctxt->wellFormed = 0;
718 if (ctxt->recovery == 0)
719 ctxt->disableSAX = 1;
720 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000721}
722
723/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000724 * xmlFatalErrMsgStr:
725 * @ctxt: an XML parser context
726 * @error: the error number
727 * @msg: the error message
728 * @val: a string value
729 *
730 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
731 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800732static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000733xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000734 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000735{
Daniel Veillard157fee02003-10-31 10:36:03 +0000736 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
737 (ctxt->instate == XML_PARSER_EOF))
738 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000739 if (ctxt != NULL)
740 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000741 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000742 XML_FROM_PARSER, error, XML_ERR_FATAL,
743 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
744 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000745 if (ctxt != NULL) {
746 ctxt->wellFormed = 0;
747 if (ctxt->recovery == 0)
748 ctxt->disableSAX = 1;
749 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000750}
751
752/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000753 * xmlErrMsgStr:
754 * @ctxt: an XML parser context
755 * @error: the error number
756 * @msg: the error message
757 * @val: a string value
758 *
759 * Handle a non fatal parser error
760 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800761static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000762xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
763 const char *msg, const xmlChar * val)
764{
Daniel Veillard157fee02003-10-31 10:36:03 +0000765 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
766 (ctxt->instate == XML_PARSER_EOF))
767 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000768 if (ctxt != NULL)
769 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000770 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000771 XML_FROM_PARSER, error, XML_ERR_ERROR,
772 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
773 val);
774}
775
776/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000777 * xmlNsErr:
778 * @ctxt: an XML parser context
779 * @error: the error number
780 * @msg: the message
781 * @info1: extra information string
782 * @info2: extra information string
783 *
784 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
785 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800786static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000787xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
788 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000789 const xmlChar * info1, const xmlChar * info2,
790 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000791{
Daniel Veillard157fee02003-10-31 10:36:03 +0000792 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793 (ctxt->instate == XML_PARSER_EOF))
794 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000795 if (ctxt != NULL)
796 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000797 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000798 XML_ERR_ERROR, NULL, 0, (const char *) info1,
799 (const char *) info2, (const char *) info3, 0, 0, msg,
800 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000801 if (ctxt != NULL)
802 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000803}
804
Daniel Veillard37334572008-07-31 08:20:02 +0000805/**
806 * xmlNsWarn
807 * @ctxt: an XML parser context
808 * @error: the error number
809 * @msg: the message
810 * @info1: extra information string
811 * @info2: extra information string
812 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800813 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000814 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800815static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000816xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
817 const char *msg,
818 const xmlChar * info1, const xmlChar * info2,
819 const xmlChar * info3)
820{
821 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
822 (ctxt->instate == XML_PARSER_EOF))
823 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000824 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825 XML_ERR_WARNING, NULL, 0, (const char *) info1,
826 (const char *) info2, (const char *) info3, 0, 0, msg,
827 info1, info2, info3);
828}
829
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000830/************************************************************************
831 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800832 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833 * *
834 ************************************************************************/
835
836/**
837 * xmlHasFeature:
838 * @feature: the feature to be examined
839 *
840 * Examines if the library has been compiled with a given feature.
841 *
842 * Returns a non-zero value if the feature exist, otherwise zero.
843 * Returns zero (0) if the feature does not exist or an unknown
844 * unknown feature is requested, non-zero otherwise.
845 */
846int
847xmlHasFeature(xmlFeature feature)
848{
849 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_THREAD_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_TREE_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_OUTPUT_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_PUSH_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_READER_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_PATTERN_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_WRITER_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_SAX1_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_FTP_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_HTTP_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_VALID_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_HTML_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_LEGACY_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_C14N_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_CATALOG_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_XPATH_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_XPTR_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_XINCLUDE_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_ICONV_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_ISO8859X_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_UNICODE_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_REGEXP_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef LIBXML_AUTOMATA_ENABLED
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_EXPR_ENABLED
990 return(1);
991#else
992 return(0);
993#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000994 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000995#ifdef LIBXML_SCHEMAS_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001000 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001001#ifdef LIBXML_SCHEMATRON_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001006 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001007#ifdef LIBXML_MODULES_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001012 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001013#ifdef LIBXML_DEBUG_ENABLED
1014 return(1);
1015#else
1016 return(0);
1017#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001018 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001019#ifdef DEBUG_MEMORY_LOCATION
1020 return(1);
1021#else
1022 return(0);
1023#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001024 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001025#ifdef LIBXML_DEBUG_RUNTIME
1026 return(1);
1027#else
1028 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001029#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001030 case XML_WITH_ZLIB:
1031#ifdef LIBXML_ZLIB_ENABLED
1032 return(1);
1033#else
1034 return(0);
1035#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001036 case XML_WITH_LZMA:
1037#ifdef LIBXML_LZMA_ENABLED
1038 return(1);
1039#else
1040 return(0);
1041#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001042 case XML_WITH_ICU:
1043#ifdef LIBXML_ICU_ENABLED
1044 return(1);
1045#else
1046 return(0);
1047#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001048 default:
1049 break;
1050 }
1051 return(0);
1052}
1053
1054/************************************************************************
1055 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001056 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057 * *
1058 ************************************************************************/
1059
1060/**
1061 * xmlDetectSAX2:
1062 * @ctxt: an XML parser context
1063 *
1064 * Do the SAX2 detection and specific intialization
1065 */
1066static void
1067xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1068 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001069#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001070 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1071 ((ctxt->sax->startElementNs != NULL) ||
1072 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001073#else
1074 ctxt->sax2 = 1;
1075#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001076
1077 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1078 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1079 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001080 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1081 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001082 xmlErrMemory(ctxt, NULL);
1083 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001084}
1085
Daniel Veillarde57ec792003-09-10 10:50:59 +00001086typedef struct _xmlDefAttrs xmlDefAttrs;
1087typedef xmlDefAttrs *xmlDefAttrsPtr;
1088struct _xmlDefAttrs {
1089 int nbAttrs; /* number of defaulted attributes on that element */
1090 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001091#if __STDC_VERSION__ >= 199901L
1092 /* Using a C99 flexible array member avoids UBSan errors. */
1093 const xmlChar *values[]; /* array of localname/prefix/values/external */
1094#else
1095 const xmlChar *values[5];
1096#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001097};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001098
1099/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001100 * xmlAttrNormalizeSpace:
1101 * @src: the source string
1102 * @dst: the target string
1103 *
1104 * Normalize the space in non CDATA attribute values:
1105 * If the attribute type is not CDATA, then the XML processor MUST further
1106 * process the normalized attribute value by discarding any leading and
1107 * trailing space (#x20) characters, and by replacing sequences of space
1108 * (#x20) characters by a single space (#x20) character.
1109 * Note that the size of dst need to be at least src, and if one doesn't need
1110 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1111 * passing src as dst is just fine.
1112 *
1113 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1114 * is needed.
1115 */
1116static xmlChar *
1117xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1118{
1119 if ((src == NULL) || (dst == NULL))
1120 return(NULL);
1121
1122 while (*src == 0x20) src++;
1123 while (*src != 0) {
1124 if (*src == 0x20) {
1125 while (*src == 0x20) src++;
1126 if (*src != 0)
1127 *dst++ = 0x20;
1128 } else {
1129 *dst++ = *src++;
1130 }
1131 }
1132 *dst = 0;
1133 if (dst == src)
1134 return(NULL);
1135 return(dst);
1136}
1137
1138/**
1139 * xmlAttrNormalizeSpace2:
1140 * @src: the source string
1141 *
1142 * Normalize the space in non CDATA attribute values, a slightly more complex
1143 * front end to avoid allocation problems when running on attribute values
1144 * coming from the input.
1145 *
1146 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1147 * is needed.
1148 */
1149static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001151{
1152 int i;
1153 int remove_head = 0;
1154 int need_realloc = 0;
1155 const xmlChar *cur;
1156
1157 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1158 return(NULL);
1159 i = *len;
1160 if (i <= 0)
1161 return(NULL);
1162
1163 cur = src;
1164 while (*cur == 0x20) {
1165 cur++;
1166 remove_head++;
1167 }
1168 while (*cur != 0) {
1169 if (*cur == 0x20) {
1170 cur++;
1171 if ((*cur == 0x20) || (*cur == 0)) {
1172 need_realloc = 1;
1173 break;
1174 }
1175 } else
1176 cur++;
1177 }
1178 if (need_realloc) {
1179 xmlChar *ret;
1180
1181 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1182 if (ret == NULL) {
1183 xmlErrMemory(ctxt, NULL);
1184 return(NULL);
1185 }
1186 xmlAttrNormalizeSpace(ret, ret);
1187 *len = (int) strlen((const char *)ret);
1188 return(ret);
1189 } else if (remove_head) {
1190 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001191 memmove(src, src + remove_head, 1 + *len);
1192 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001193 }
1194 return(NULL);
1195}
1196
1197/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001198 * xmlAddDefAttrs:
1199 * @ctxt: an XML parser context
1200 * @fullname: the element fullname
1201 * @fullattr: the attribute fullname
1202 * @value: the attribute value
1203 *
1204 * Add a defaulted attribute for an element
1205 */
1206static void
1207xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1208 const xmlChar *fullname,
1209 const xmlChar *fullattr,
1210 const xmlChar *value) {
1211 xmlDefAttrsPtr defaults;
1212 int len;
1213 const xmlChar *name;
1214 const xmlChar *prefix;
1215
Daniel Veillard6a31b832008-03-26 14:06:44 +00001216 /*
1217 * Allows to detect attribute redefinitions
1218 */
1219 if (ctxt->attsSpecial != NULL) {
1220 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1221 return;
1222 }
1223
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001225 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 if (ctxt->attsDefault == NULL)
1227 goto mem_error;
1228 }
1229
1230 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001231 * split the element name into prefix:localname , the string found
1232 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001233 */
1234 name = xmlSplitQName3(fullname, &len);
1235 if (name == NULL) {
1236 name = xmlDictLookup(ctxt->dict, fullname, -1);
1237 prefix = NULL;
1238 } else {
1239 name = xmlDictLookup(ctxt->dict, name, -1);
1240 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1241 }
1242
1243 /*
1244 * make sure there is some storage
1245 */
1246 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1247 if (defaults == NULL) {
1248 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001249 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001250 if (defaults == NULL)
1251 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001252 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001253 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001254 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1255 defaults, NULL) < 0) {
1256 xmlFree(defaults);
1257 goto mem_error;
1258 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001259 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001260 xmlDefAttrsPtr temp;
1261
1262 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001263 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001264 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001265 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001266 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001267 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001268 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1269 defaults, NULL) < 0) {
1270 xmlFree(defaults);
1271 goto mem_error;
1272 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001273 }
1274
1275 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001276 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001277 * are within the DTD and hen not associated to namespace names.
1278 */
1279 name = xmlSplitQName3(fullattr, &len);
1280 if (name == NULL) {
1281 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1282 prefix = NULL;
1283 } else {
1284 name = xmlDictLookup(ctxt->dict, name, -1);
1285 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1286 }
1287
Daniel Veillardae0765b2008-07-31 19:54:59 +00001288 defaults->values[5 * defaults->nbAttrs] = name;
1289 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001290 /* intern the string and precompute the end */
1291 len = xmlStrlen(value);
1292 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001293 defaults->values[5 * defaults->nbAttrs + 2] = value;
1294 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1295 if (ctxt->external)
1296 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1297 else
1298 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001299 defaults->nbAttrs++;
1300
1301 return;
1302
1303mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001304 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001305 return;
1306}
1307
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001308/**
1309 * xmlAddSpecialAttr:
1310 * @ctxt: an XML parser context
1311 * @fullname: the element fullname
1312 * @fullattr: the attribute fullname
1313 * @type: the attribute type
1314 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001315 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001316 */
1317static void
1318xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1319 const xmlChar *fullname,
1320 const xmlChar *fullattr,
1321 int type)
1322{
1323 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001324 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001325 if (ctxt->attsSpecial == NULL)
1326 goto mem_error;
1327 }
1328
Daniel Veillardac4118d2008-01-11 05:27:32 +00001329 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1330 return;
1331
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001332 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1333 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001334 return;
1335
1336mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001337 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001338 return;
1339}
1340
Daniel Veillard4432df22003-09-28 18:58:27 +00001341/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001342 * xmlCleanSpecialAttrCallback:
1343 *
1344 * Removes CDATA attributes from the special attribute table
1345 */
1346static void
1347xmlCleanSpecialAttrCallback(void *payload, void *data,
1348 const xmlChar *fullname, const xmlChar *fullattr,
1349 const xmlChar *unused ATTRIBUTE_UNUSED) {
1350 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1351
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001352 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001353 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1354 }
1355}
1356
1357/**
1358 * xmlCleanSpecialAttr:
1359 * @ctxt: an XML parser context
1360 *
1361 * Trim the list of attributes defined to remove all those of type
1362 * CDATA as they are not special. This call should be done when finishing
1363 * to parse the DTD and before starting to parse the document root.
1364 */
1365static void
1366xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1367{
1368 if (ctxt->attsSpecial == NULL)
1369 return;
1370
1371 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1372
1373 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1374 xmlHashFree(ctxt->attsSpecial, NULL);
1375 ctxt->attsSpecial = NULL;
1376 }
1377 return;
1378}
1379
1380/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001381 * xmlCheckLanguageID:
1382 * @lang: pointer to the string value
1383 *
1384 * Checks that the value conforms to the LanguageID production:
1385 *
1386 * NOTE: this is somewhat deprecated, those productions were removed from
1387 * the XML Second edition.
1388 *
1389 * [33] LanguageID ::= Langcode ('-' Subcode)*
1390 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1391 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1392 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1393 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1394 * [38] Subcode ::= ([a-z] | [A-Z])+
1395 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001396 * The current REC reference the sucessors of RFC 1766, currently 5646
1397 *
1398 * http://www.rfc-editor.org/rfc/rfc5646.txt
1399 * langtag = language
1400 * ["-" script]
1401 * ["-" region]
1402 * *("-" variant)
1403 * *("-" extension)
1404 * ["-" privateuse]
1405 * language = 2*3ALPHA ; shortest ISO 639 code
1406 * ["-" extlang] ; sometimes followed by
1407 * ; extended language subtags
1408 * / 4ALPHA ; or reserved for future use
1409 * / 5*8ALPHA ; or registered language subtag
1410 *
1411 * extlang = 3ALPHA ; selected ISO 639 codes
1412 * *2("-" 3ALPHA) ; permanently reserved
1413 *
1414 * script = 4ALPHA ; ISO 15924 code
1415 *
1416 * region = 2ALPHA ; ISO 3166-1 code
1417 * / 3DIGIT ; UN M.49 code
1418 *
1419 * variant = 5*8alphanum ; registered variants
1420 * / (DIGIT 3alphanum)
1421 *
1422 * extension = singleton 1*("-" (2*8alphanum))
1423 *
1424 * ; Single alphanumerics
1425 * ; "x" reserved for private use
1426 * singleton = DIGIT ; 0 - 9
1427 * / %x41-57 ; A - W
1428 * / %x59-5A ; Y - Z
1429 * / %x61-77 ; a - w
1430 * / %x79-7A ; y - z
1431 *
1432 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1433 * The parser below doesn't try to cope with extension or privateuse
1434 * that could be added but that's not interoperable anyway
1435 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001436 * Returns 1 if correct 0 otherwise
1437 **/
1438int
1439xmlCheckLanguageID(const xmlChar * lang)
1440{
Daniel Veillard60587d62010-11-04 15:16:27 +01001441 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001442
1443 if (cur == NULL)
1444 return (0);
1445 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001446 ((cur[0] == 'I') && (cur[1] == '-')) ||
1447 ((cur[0] == 'x') && (cur[1] == '-')) ||
1448 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001449 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001450 * Still allow IANA code and user code which were coming
1451 * from the previous version of the XML-1.0 specification
1452 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001453 */
1454 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001455 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001456 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1457 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001458 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001459 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001460 nxt = cur;
1461 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1462 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1463 nxt++;
1464 if (nxt - cur >= 4) {
1465 /*
1466 * Reserved
1467 */
1468 if ((nxt - cur > 8) || (nxt[0] != 0))
1469 return(0);
1470 return(1);
1471 }
1472 if (nxt - cur < 2)
1473 return(0);
1474 /* we got an ISO 639 code */
1475 if (nxt[0] == 0)
1476 return(1);
1477 if (nxt[0] != '-')
1478 return(0);
1479
1480 nxt++;
1481 cur = nxt;
1482 /* now we can have extlang or script or region or variant */
1483 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1484 goto region_m49;
1485
1486 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1487 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1488 nxt++;
1489 if (nxt - cur == 4)
1490 goto script;
1491 if (nxt - cur == 2)
1492 goto region;
1493 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1494 goto variant;
1495 if (nxt - cur != 3)
1496 return(0);
1497 /* we parsed an extlang */
1498 if (nxt[0] == 0)
1499 return(1);
1500 if (nxt[0] != '-')
1501 return(0);
1502
1503 nxt++;
1504 cur = nxt;
1505 /* now we can have script or region or variant */
1506 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1507 goto region_m49;
1508
1509 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1510 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1511 nxt++;
1512 if (nxt - cur == 2)
1513 goto region;
1514 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1515 goto variant;
1516 if (nxt - cur != 4)
1517 return(0);
1518 /* we parsed a script */
1519script:
1520 if (nxt[0] == 0)
1521 return(1);
1522 if (nxt[0] != '-')
1523 return(0);
1524
1525 nxt++;
1526 cur = nxt;
1527 /* now we can have region or variant */
1528 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1529 goto region_m49;
1530
1531 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1532 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1533 nxt++;
1534
1535 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1536 goto variant;
1537 if (nxt - cur != 2)
1538 return(0);
1539 /* we parsed a region */
1540region:
1541 if (nxt[0] == 0)
1542 return(1);
1543 if (nxt[0] != '-')
1544 return(0);
1545
1546 nxt++;
1547 cur = nxt;
1548 /* now we can just have a variant */
1549 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1550 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1551 nxt++;
1552
1553 if ((nxt - cur < 5) || (nxt - cur > 8))
1554 return(0);
1555
1556 /* we parsed a variant */
1557variant:
1558 if (nxt[0] == 0)
1559 return(1);
1560 if (nxt[0] != '-')
1561 return(0);
1562 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001563 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001564
1565region_m49:
1566 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1567 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1568 nxt += 3;
1569 goto region;
1570 }
1571 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001572}
1573
Owen Taylor3473f882001-02-23 17:55:21 +00001574/************************************************************************
1575 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001576 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001577 * *
1578 ************************************************************************/
1579
Daniel Veillard8ed10722009-08-20 19:17:36 +02001580static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1581 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001582
Daniel Veillard0fb18932003-09-07 09:14:37 +00001583#ifdef SAX2
1584/**
1585 * nsPush:
1586 * @ctxt: an XML parser context
1587 * @prefix: the namespace prefix or NULL
1588 * @URL: the namespace name
1589 *
1590 * Pushes a new parser namespace on top of the ns stack
1591 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001592 * Returns -1 in case of error, -2 if the namespace should be discarded
1593 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001594 */
1595static int
1596nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1597{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001598 if (ctxt->options & XML_PARSE_NSCLEAN) {
1599 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001600 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001601 if (ctxt->nsTab[i] == prefix) {
1602 /* in scope */
1603 if (ctxt->nsTab[i + 1] == URL)
1604 return(-2);
1605 /* out of scope keep it */
1606 break;
1607 }
1608 }
1609 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001610 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1611 ctxt->nsMax = 10;
1612 ctxt->nsNr = 0;
1613 ctxt->nsTab = (const xmlChar **)
1614 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1615 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001616 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 ctxt->nsMax = 0;
1618 return (-1);
1619 }
1620 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001621 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001622 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001623 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1624 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1625 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001626 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001627 ctxt->nsMax /= 2;
1628 return (-1);
1629 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001630 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001631 }
1632 ctxt->nsTab[ctxt->nsNr++] = prefix;
1633 ctxt->nsTab[ctxt->nsNr++] = URL;
1634 return (ctxt->nsNr);
1635}
1636/**
1637 * nsPop:
1638 * @ctxt: an XML parser context
1639 * @nr: the number to pop
1640 *
1641 * Pops the top @nr parser prefix/namespace from the ns stack
1642 *
1643 * Returns the number of namespaces removed
1644 */
1645static int
1646nsPop(xmlParserCtxtPtr ctxt, int nr)
1647{
1648 int i;
1649
1650 if (ctxt->nsTab == NULL) return(0);
1651 if (ctxt->nsNr < nr) {
1652 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1653 nr = ctxt->nsNr;
1654 }
1655 if (ctxt->nsNr <= 0)
1656 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001657
Daniel Veillard0fb18932003-09-07 09:14:37 +00001658 for (i = 0;i < nr;i++) {
1659 ctxt->nsNr--;
1660 ctxt->nsTab[ctxt->nsNr] = NULL;
1661 }
1662 return(nr);
1663}
1664#endif
1665
1666static int
1667xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1668 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001669 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001670 int maxatts;
1671
1672 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001673 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001674 atts = (const xmlChar **)
1675 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001677 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1679 if (attallocs == NULL) goto mem_error;
1680 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001681 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001682 } else if (nr + 5 > ctxt->maxatts) {
1683 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001684 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1685 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001686 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001687 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001688 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1689 (maxatts / 5) * sizeof(int));
1690 if (attallocs == NULL) goto mem_error;
1691 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692 ctxt->maxatts = maxatts;
1693 }
1694 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001695mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001696 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001697 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001698}
1699
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001700/**
1701 * inputPush:
1702 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001703 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001704 *
1705 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001706 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001707 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001708 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001709int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001710inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1711{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001712 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001713 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001714 if (ctxt->inputNr >= ctxt->inputMax) {
1715 ctxt->inputMax *= 2;
1716 ctxt->inputTab =
1717 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1718 ctxt->inputMax *
1719 sizeof(ctxt->inputTab[0]));
1720 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001721 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001722 xmlFreeInputStream(value);
1723 ctxt->inputMax /= 2;
1724 value = NULL;
1725 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726 }
1727 }
1728 ctxt->inputTab[ctxt->inputNr] = value;
1729 ctxt->input = value;
1730 return (ctxt->inputNr++);
1731}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001732/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001733 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001734 * @ctxt: an XML parser context
1735 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001736 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001737 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001738 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001739 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001740xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741inputPop(xmlParserCtxtPtr ctxt)
1742{
1743 xmlParserInputPtr ret;
1744
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001745 if (ctxt == NULL)
1746 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001747 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001748 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001749 ctxt->inputNr--;
1750 if (ctxt->inputNr > 0)
1751 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1752 else
1753 ctxt->input = NULL;
1754 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001755 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756 return (ret);
1757}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001758/**
1759 * nodePush:
1760 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001761 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001762 *
1763 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001764 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001765 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001766 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001767int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001768nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1769{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001770 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001771 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001772 xmlNodePtr *tmp;
1773
1774 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1775 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001776 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001777 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001778 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001779 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001780 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001781 ctxt->nodeTab = tmp;
1782 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001784 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1785 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001786 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001787 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001788 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001789 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001790 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001791 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001792 ctxt->nodeTab[ctxt->nodeNr] = value;
1793 ctxt->node = value;
1794 return (ctxt->nodeNr++);
1795}
Daniel Veillard8915c152008-08-26 13:05:34 +00001796
Daniel Veillard1c732d22002-11-30 11:22:59 +00001797/**
1798 * nodePop:
1799 * @ctxt: an XML parser context
1800 *
1801 * Pops the top element node from the node stack
1802 *
1803 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001804 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001805xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001806nodePop(xmlParserCtxtPtr ctxt)
1807{
1808 xmlNodePtr ret;
1809
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001810 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001812 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001813 ctxt->nodeNr--;
1814 if (ctxt->nodeNr > 0)
1815 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1816 else
1817 ctxt->node = NULL;
1818 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001819 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001820 return (ret);
1821}
Daniel Veillarda2351322004-06-27 12:08:10 +00001822
1823#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001824/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001825 * nameNsPush:
1826 * @ctxt: an XML parser context
1827 * @value: the element name
1828 * @prefix: the element prefix
1829 * @URI: the element namespace name
1830 *
1831 * Pushes a new element name/prefix/URL on top of the name stack
1832 *
1833 * Returns -1 in case of error, the index in the stack otherwise
1834 */
1835static int
1836nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1837 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1838{
1839 if (ctxt->nameNr >= ctxt->nameMax) {
1840 const xmlChar * *tmp;
1841 void **tmp2;
1842 ctxt->nameMax *= 2;
1843 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1844 ctxt->nameMax *
1845 sizeof(ctxt->nameTab[0]));
1846 if (tmp == NULL) {
1847 ctxt->nameMax /= 2;
1848 goto mem_error;
1849 }
1850 ctxt->nameTab = tmp;
1851 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1852 ctxt->nameMax * 3 *
1853 sizeof(ctxt->pushTab[0]));
1854 if (tmp2 == NULL) {
1855 ctxt->nameMax /= 2;
1856 goto mem_error;
1857 }
1858 ctxt->pushTab = tmp2;
1859 }
1860 ctxt->nameTab[ctxt->nameNr] = value;
1861 ctxt->name = value;
1862 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1863 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001864 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001865 return (ctxt->nameNr++);
1866mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001867 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001868 return (-1);
1869}
1870/**
1871 * nameNsPop:
1872 * @ctxt: an XML parser context
1873 *
1874 * Pops the top element/prefix/URI name from the name stack
1875 *
1876 * Returns the name just removed
1877 */
1878static const xmlChar *
1879nameNsPop(xmlParserCtxtPtr ctxt)
1880{
1881 const xmlChar *ret;
1882
1883 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001884 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 ctxt->nameNr--;
1886 if (ctxt->nameNr > 0)
1887 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1888 else
1889 ctxt->name = NULL;
1890 ret = ctxt->nameTab[ctxt->nameNr];
1891 ctxt->nameTab[ctxt->nameNr] = NULL;
1892 return (ret);
1893}
Daniel Veillarda2351322004-06-27 12:08:10 +00001894#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001895
1896/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001897 * namePush:
1898 * @ctxt: an XML parser context
1899 * @value: the element name
1900 *
1901 * Pushes a new element name on top of the name stack
1902 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001903 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001904 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001905int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001906namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001907{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001908 if (ctxt == NULL) return (-1);
1909
Daniel Veillard1c732d22002-11-30 11:22:59 +00001910 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001911 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001912 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001913 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001914 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001916 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001917 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001918 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001919 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001920 }
1921 ctxt->nameTab[ctxt->nameNr] = value;
1922 ctxt->name = value;
1923 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001924mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001925 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001926 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001927}
1928/**
1929 * namePop:
1930 * @ctxt: an XML parser context
1931 *
1932 * Pops the top element name from the name stack
1933 *
1934 * Returns the name just removed
1935 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001936const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001937namePop(xmlParserCtxtPtr ctxt)
1938{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001939 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001940
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001941 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1942 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001943 ctxt->nameNr--;
1944 if (ctxt->nameNr > 0)
1945 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1946 else
1947 ctxt->name = NULL;
1948 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001949 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001950 return (ret);
1951}
Owen Taylor3473f882001-02-23 17:55:21 +00001952
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001953static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001954 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001955 int *tmp;
1956
Owen Taylor3473f882001-02-23 17:55:21 +00001957 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001958 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1959 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1960 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001961 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001962 ctxt->spaceMax /=2;
1963 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001964 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001965 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001966 }
1967 ctxt->spaceTab[ctxt->spaceNr] = val;
1968 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1969 return(ctxt->spaceNr++);
1970}
1971
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001972static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001973 int ret;
1974 if (ctxt->spaceNr <= 0) return(0);
1975 ctxt->spaceNr--;
1976 if (ctxt->spaceNr > 0)
1977 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1978 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001979 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001980 ret = ctxt->spaceTab[ctxt->spaceNr];
1981 ctxt->spaceTab[ctxt->spaceNr] = -1;
1982 return(ret);
1983}
1984
1985/*
1986 * Macros for accessing the content. Those should be used only by the parser,
1987 * and not exported.
1988 *
1989 * Dirty macros, i.e. one often need to make assumption on the context to
1990 * use them
1991 *
1992 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1993 * To be used with extreme caution since operations consuming
1994 * characters may move the input buffer to a different location !
1995 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1996 * This should be used internally by the parser
1997 * only to compare to ASCII values otherwise it would break when
1998 * running with UTF-8 encoding.
1999 * RAW same as CUR but in the input buffer, bypass any token
2000 * extraction that may have been done
2001 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2002 * to compare on ASCII based substring.
2003 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002004 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002005 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002006 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002007 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2008 *
2009 * NEXT Skip to the next character, this does the proper decoding
2010 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002011 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002012 * CUR_CHAR(l) returns the current unicode character (int), set l
2013 * to the number of xmlChars used for the encoding [0-5].
2014 * CUR_SCHAR same but operate on a string instead of the context
2015 * COPY_BUF copy the current unicode char to the target buffer, increment
2016 * the index
2017 * GROW, SHRINK handling of input buffers
2018 */
2019
Daniel Veillardfdc91562002-07-01 21:52:03 +00002020#define RAW (*ctxt->input->cur)
2021#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002022#define NXT(val) ctxt->input->cur[(val)]
2023#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002024#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002025
Daniel Veillarda07050d2003-10-19 14:46:32 +00002026#define CMP4( s, c1, c2, c3, c4 ) \
2027 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2028 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2029#define CMP5( s, c1, c2, c3, c4, c5 ) \
2030 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2031#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2032 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2033#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2034 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2035#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2036 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2037#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2038 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2039 ((unsigned char *) s)[ 8 ] == c9 )
2040#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2041 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2042 ((unsigned char *) s)[ 9 ] == c10 )
2043
Owen Taylor3473f882001-02-23 17:55:21 +00002044#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002045 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002046 if (*ctxt->input->cur == 0) \
2047 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002048 } while (0)
2049
Daniel Veillard0b787f32004-03-26 17:29:53 +00002050#define SKIPL(val) do { \
2051 int skipl; \
2052 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002053 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002054 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002055 } else ctxt->input->col++; \
2056 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002057 ctxt->input->cur++; \
2058 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002059 if (*ctxt->input->cur == 0) \
2060 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002061 } while (0)
2062
Daniel Veillarda880b122003-04-21 21:36:41 +00002063#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002064 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2065 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002066 xmlSHRINK (ctxt);
2067
2068static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2069 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002070 if (*ctxt->input->cur == 0)
2071 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2072}
Owen Taylor3473f882001-02-23 17:55:21 +00002073
Daniel Veillarda880b122003-04-21 21:36:41 +00002074#define GROW if ((ctxt->progressive == 0) && \
2075 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002076 xmlGROW (ctxt);
2077
2078static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002079 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2080 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2081
2082 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2083 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002084 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002085 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002087 xmlHaltParser(ctxt);
2088 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002089 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002090 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002091 if ((ctxt->input->cur > ctxt->input->end) ||
2092 (ctxt->input->cur < ctxt->input->base)) {
2093 xmlHaltParser(ctxt);
2094 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095 return;
2096 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002097 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2098 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002099}
Owen Taylor3473f882001-02-23 17:55:21 +00002100
2101#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103#define NEXT xmlNextChar(ctxt)
2104
Daniel Veillard21a0f912001-02-25 19:54:14 +00002105#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002106 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002109 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
Owen Taylor3473f882001-02-23 17:55:21 +00002113#define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002117 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002118 } while (0)
2119
2120#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122
2123#define COPY_BUF(l,b,i,v) \
2124 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002125 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002126
2127/**
2128 * xmlSkipBlankChars:
2129 * @ctxt: the XML parser context
2130 *
2131 * skip all blanks character found at that point in the input streams.
2132 * It pops up finished entities in the process if allowable at that point.
2133 *
2134 * Returns the number of space chars skipped
2135 */
2136
2137int
2138xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002139 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002140
2141 /*
2142 * It's Okay to use CUR/NEXT here since all the blanks are on
2143 * the ASCII range.
2144 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002145 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002147 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002148 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002149 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002150 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002151 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002152 if (*cur == '\n') {
2153 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002154 } else {
2155 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002156 }
2157 cur++;
2158 res++;
2159 if (*cur == 0) {
2160 ctxt->input->cur = cur;
2161 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2162 cur = ctxt->input->cur;
2163 }
2164 }
2165 ctxt->input->cur = cur;
2166 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002167 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168
2169 while (1) {
2170 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002171 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002172 } else if (CUR == '%') {
2173 /*
2174 * Need to handle support of entities branching here
2175 */
2176 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177 break;
2178 xmlParsePEReference(ctxt);
2179 } else if (CUR == 0) {
2180 if (ctxt->inputNr <= 1)
2181 break;
2182 xmlPopInput(ctxt);
2183 } else {
2184 break;
2185 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002186
2187 /*
2188 * Also increase the counter when entering or exiting a PERef.
2189 * The spec says: "When a parameter-entity reference is recognized
2190 * in the DTD and included, its replacement text MUST be enlarged
2191 * by the attachment of one leading and one following space (#x20)
2192 * character."
2193 */
2194 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002195 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002196 }
Owen Taylor3473f882001-02-23 17:55:21 +00002197 return(res);
2198}
2199
2200/************************************************************************
2201 * *
2202 * Commodity functions to handle entities *
2203 * *
2204 ************************************************************************/
2205
2206/**
2207 * xmlPopInput:
2208 * @ctxt: an XML parser context
2209 *
2210 * xmlPopInput: the current input pointed by ctxt->input came to an end
2211 * pop it and return the next char.
2212 *
2213 * Returns the current xmlChar in the parser context
2214 */
2215xmlChar
2216xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002217 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002218 if (xmlParserDebugEntities)
2219 xmlGenericError(xmlGenericErrorContext,
2220 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002221 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222 (ctxt->instate != XML_PARSER_EOF))
2223 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2224 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002225 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002226 if (*ctxt->input->cur == 0)
2227 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002228 return(CUR);
2229}
2230
2231/**
2232 * xmlPushInput:
2233 * @ctxt: an XML parser context
2234 * @input: an XML parser input fragment (entity, XML fragment ...).
2235 *
2236 * xmlPushInput: switch to a new input stream which is stacked on top
2237 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002238 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002239 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002240int
Owen Taylor3473f882001-02-23 17:55:21 +00002241xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002242 int ret;
2243 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002244
2245 if (xmlParserDebugEntities) {
2246 if ((ctxt->input != NULL) && (ctxt->input->filename))
2247 xmlGenericError(xmlGenericErrorContext,
2248 "%s(%d): ", ctxt->input->filename,
2249 ctxt->input->line);
2250 xmlGenericError(xmlGenericErrorContext,
2251 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002253 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002254 if (ctxt->instate == XML_PARSER_EOF)
2255 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002256 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002257 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002258}
2259
2260/**
2261 * xmlParseCharRef:
2262 * @ctxt: an XML parser context
2263 *
2264 * parse Reference declarations
2265 *
2266 * [66] CharRef ::= '&#' [0-9]+ ';' |
2267 * '&#x' [0-9a-fA-F]+ ';'
2268 *
2269 * [ WFC: Legal Character ]
2270 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002271 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002272 *
2273 * Returns the value parsed (as an int), 0 in case of error
2274 */
2275int
2276xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002277 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002278 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002279 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002280
Owen Taylor3473f882001-02-23 17:55:21 +00002281 /*
2282 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2283 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002284 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002285 (NXT(2) == 'x')) {
2286 SKIP(3);
2287 GROW;
2288 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002289 if (count++ > 20) {
2290 count = 0;
2291 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002292 if (ctxt->instate == XML_PARSER_EOF)
2293 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002294 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002295 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002296 val = val * 16 + (CUR - '0');
2297 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2298 val = val * 16 + (CUR - 'a') + 10;
2299 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2300 val = val * 16 + (CUR - 'A') + 10;
2301 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002302 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002303 val = 0;
2304 break;
2305 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002306 if (val > 0x10FFFF)
2307 outofrange = val;
2308
Owen Taylor3473f882001-02-23 17:55:21 +00002309 NEXT;
2310 count++;
2311 }
2312 if (RAW == ';') {
2313 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002314 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002315 ctxt->nbChars ++;
2316 ctxt->input->cur++;
2317 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002318 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002319 SKIP(2);
2320 GROW;
2321 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002322 if (count++ > 20) {
2323 count = 0;
2324 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002325 if (ctxt->instate == XML_PARSER_EOF)
2326 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002327 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002328 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002329 val = val * 10 + (CUR - '0');
2330 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002331 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002332 val = 0;
2333 break;
2334 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002335 if (val > 0x10FFFF)
2336 outofrange = val;
2337
Owen Taylor3473f882001-02-23 17:55:21 +00002338 NEXT;
2339 count++;
2340 }
2341 if (RAW == ';') {
2342 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002343 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002344 ctxt->nbChars ++;
2345 ctxt->input->cur++;
2346 }
2347 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002348 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002349 }
2350
2351 /*
2352 * [ WFC: Legal Character ]
2353 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002354 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002355 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002356 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002357 return(val);
2358 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002359 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2360 "xmlParseCharRef: invalid xmlChar value %d\n",
2361 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002362 }
2363 return(0);
2364}
2365
2366/**
2367 * xmlParseStringCharRef:
2368 * @ctxt: an XML parser context
2369 * @str: a pointer to an index in the string
2370 *
2371 * parse Reference declarations, variant parsing from a string rather
2372 * than an an input flow.
2373 *
2374 * [66] CharRef ::= '&#' [0-9]+ ';' |
2375 * '&#x' [0-9a-fA-F]+ ';'
2376 *
2377 * [ WFC: Legal Character ]
2378 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002379 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002380 *
2381 * Returns the value parsed (as an int), 0 in case of error, str will be
2382 * updated to the current value of the index
2383 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002384static int
Owen Taylor3473f882001-02-23 17:55:21 +00002385xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2386 const xmlChar *ptr;
2387 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002388 unsigned int val = 0;
2389 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002390
2391 if ((str == NULL) || (*str == NULL)) return(0);
2392 ptr = *str;
2393 cur = *ptr;
2394 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2395 ptr += 3;
2396 cur = *ptr;
2397 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002398 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002399 val = val * 16 + (cur - '0');
2400 else if ((cur >= 'a') && (cur <= 'f'))
2401 val = val * 16 + (cur - 'a') + 10;
2402 else if ((cur >= 'A') && (cur <= 'F'))
2403 val = val * 16 + (cur - 'A') + 10;
2404 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002405 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002406 val = 0;
2407 break;
2408 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002409 if (val > 0x10FFFF)
2410 outofrange = val;
2411
Owen Taylor3473f882001-02-23 17:55:21 +00002412 ptr++;
2413 cur = *ptr;
2414 }
2415 if (cur == ';')
2416 ptr++;
2417 } else if ((cur == '&') && (ptr[1] == '#')){
2418 ptr += 2;
2419 cur = *ptr;
2420 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002421 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002422 val = val * 10 + (cur - '0');
2423 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002424 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002425 val = 0;
2426 break;
2427 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002428 if (val > 0x10FFFF)
2429 outofrange = val;
2430
Owen Taylor3473f882001-02-23 17:55:21 +00002431 ptr++;
2432 cur = *ptr;
2433 }
2434 if (cur == ';')
2435 ptr++;
2436 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002437 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002438 return(0);
2439 }
2440 *str = ptr;
2441
2442 /*
2443 * [ WFC: Legal Character ]
2444 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002445 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002446 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002447 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002448 return(val);
2449 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002450 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2451 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2452 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002453 }
2454 return(0);
2455}
2456
2457/**
2458 * xmlParserHandlePEReference:
2459 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002460 *
Owen Taylor3473f882001-02-23 17:55:21 +00002461 * [69] PEReference ::= '%' Name ';'
2462 *
2463 * [ WFC: No Recursion ]
2464 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002465 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002466 *
2467 * [ WFC: Entity Declared ]
2468 * In a document without any DTD, a document with only an internal DTD
2469 * subset which contains no parameter entity references, or a document
2470 * with "standalone='yes'", ... ... The declaration of a parameter
2471 * entity must precede any reference to it...
2472 *
2473 * [ VC: Entity Declared ]
2474 * In a document with an external subset or external parameter entities
2475 * with "standalone='no'", ... ... The declaration of a parameter entity
2476 * must precede any reference to it...
2477 *
2478 * [ WFC: In DTD ]
2479 * Parameter-entity references may only appear in the DTD.
2480 * NOTE: misleading but this is handled.
2481 *
2482 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002483 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002484 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002485 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002486 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002487 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002488 */
2489void
2490xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002491 switch(ctxt->instate) {
2492 case XML_PARSER_CDATA_SECTION:
2493 return;
2494 case XML_PARSER_COMMENT:
2495 return;
2496 case XML_PARSER_START_TAG:
2497 return;
2498 case XML_PARSER_END_TAG:
2499 return;
2500 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002501 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002502 return;
2503 case XML_PARSER_PROLOG:
2504 case XML_PARSER_START:
2505 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002506 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002507 return;
2508 case XML_PARSER_ENTITY_DECL:
2509 case XML_PARSER_CONTENT:
2510 case XML_PARSER_ATTRIBUTE_VALUE:
2511 case XML_PARSER_PI:
2512 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002513 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002514 /* we just ignore it there */
2515 return;
2516 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002517 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002518 return;
2519 case XML_PARSER_ENTITY_VALUE:
2520 /*
2521 * NOTE: in the case of entity values, we don't do the
2522 * substitution here since we need the literal
2523 * entity value to be able to save the internal
2524 * subset of the document.
2525 * This will be handled by xmlStringDecodeEntities
2526 */
2527 return;
2528 case XML_PARSER_DTD:
2529 /*
2530 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2531 * In the internal DTD subset, parameter-entity references
2532 * can occur only where markup declarations can occur, not
2533 * within markup declarations.
2534 * In that case this is handled in xmlParseMarkupDecl
2535 */
2536 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2537 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002538 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002539 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002540 break;
2541 case XML_PARSER_IGNORE:
2542 return;
2543 }
2544
Nick Wellnhofer03904152017-06-05 21:16:00 +02002545 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002546}
2547
2548/*
2549 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002550 * buffer##_size is expected to be a size_t
2551 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002552 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002553#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002554 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002555 size_t new_size = buffer##_size * 2 + n; \
2556 if (new_size < buffer##_size) goto mem_error; \
2557 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002558 if (tmp == NULL) goto mem_error; \
2559 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002560 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002561}
2562
2563/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002564 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002565 * @ctxt: the parser context
2566 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002567 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002568 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2569 * @end: an end marker xmlChar, 0 if none
2570 * @end2: an end marker xmlChar, 0 if none
2571 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002572 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002573 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002574 *
2575 * [67] Reference ::= EntityRef | CharRef
2576 *
2577 * [69] PEReference ::= '%' Name ';'
2578 *
2579 * Returns A newly allocated string with the substitution done. The caller
2580 * must deallocate it !
2581 */
2582xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002583xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2584 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002585 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002586 size_t buffer_size = 0;
2587 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002588
2589 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002590 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002591 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002592 xmlEntityPtr ent;
2593 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002594
Daniel Veillarda82b1822004-11-08 16:24:57 +00002595 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002596 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002597 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002598
Daniel Veillard0161e632008-08-28 15:36:32 +00002599 if (((ctxt->depth > 40) &&
2600 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2601 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002602 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002603 return(NULL);
2604 }
2605
2606 /*
2607 * allocate a translation buffer.
2608 */
2609 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002610 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002611 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002612
2613 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002614 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002615 * we are operating on already parsed values.
2616 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002617 if (str < last)
2618 c = CUR_SCHAR(str, l);
2619 else
2620 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002621 while ((c != 0) && (c != end) && /* non input consuming loop */
2622 (c != end2) && (c != end3)) {
2623
2624 if (c == 0) break;
2625 if ((c == '&') && (str[1] == '#')) {
2626 int val = xmlParseStringCharRef(ctxt, &str);
2627 if (val != 0) {
2628 COPY_BUF(0,buffer,nbchars,val);
2629 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002630 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002631 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002632 }
Owen Taylor3473f882001-02-23 17:55:21 +00002633 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2634 if (xmlParserDebugEntities)
2635 xmlGenericError(xmlGenericErrorContext,
2636 "String decoding Entity Reference: %.30s\n",
2637 str);
2638 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002639 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2640 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002641 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002642 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002643 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002644 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002645 if ((ent != NULL) &&
2646 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2647 if (ent->content != NULL) {
2648 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002649 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002650 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002651 }
Owen Taylor3473f882001-02-23 17:55:21 +00002652 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002653 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2654 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002655 }
2656 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002657 ctxt->depth++;
2658 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2659 0, 0, 0);
2660 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002661
David Drysdale69030712015-11-20 11:13:45 +08002662 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2663 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2664 goto int_error;
2665
Owen Taylor3473f882001-02-23 17:55:21 +00002666 if (rep != NULL) {
2667 current = rep;
2668 while (*current != 0) { /* non input consuming loop */
2669 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002670 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002671 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002672 goto int_error;
2673 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002674 }
2675 }
2676 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002677 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002678 }
2679 } else if (ent != NULL) {
2680 int i = xmlStrlen(ent->name);
2681 const xmlChar *cur = ent->name;
2682
2683 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002684 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002685 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002686 }
2687 for (;i > 0;i--)
2688 buffer[nbchars++] = *cur++;
2689 buffer[nbchars++] = ';';
2690 }
2691 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2692 if (xmlParserDebugEntities)
2693 xmlGenericError(xmlGenericErrorContext,
2694 "String decoding PE Reference: %.30s\n", str);
2695 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002696 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2697 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002698 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002699 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002700 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002701 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002702 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002703 /*
2704 * Note: external parsed entities will not be loaded,
2705 * it is not required for a non-validating parser to
2706 * complete external PEreferences coming from the
2707 * internal subset
2708 */
2709 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2710 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2711 (ctxt->validate != 0)) {
2712 xmlLoadEntityContent(ctxt, ent);
2713 } else {
2714 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2715 "not validating will not read content for PE entity %s\n",
2716 ent->name, NULL);
2717 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002718 }
Owen Taylor3473f882001-02-23 17:55:21 +00002719 ctxt->depth++;
2720 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2721 0, 0, 0);
2722 ctxt->depth--;
2723 if (rep != NULL) {
2724 current = rep;
2725 while (*current != 0) { /* non input consuming loop */
2726 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002728 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002729 goto int_error;
2730 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002731 }
2732 }
2733 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002734 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002735 }
2736 }
2737 } else {
2738 COPY_BUF(l,buffer,nbchars,c);
2739 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002740 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2741 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002742 }
2743 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002744 if (str < last)
2745 c = CUR_SCHAR(str, l);
2746 else
2747 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002749 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002750 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002751
2752mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002753 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002754int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002755 if (rep != NULL)
2756 xmlFree(rep);
2757 if (buffer != NULL)
2758 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002759 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002760}
2761
Daniel Veillarde57ec792003-09-10 10:50:59 +00002762/**
2763 * xmlStringDecodeEntities:
2764 * @ctxt: the parser context
2765 * @str: the input string
2766 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2767 * @end: an end marker xmlChar, 0 if none
2768 * @end2: an end marker xmlChar, 0 if none
2769 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002770 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002771 * Takes a entity string content and process to do the adequate substitutions.
2772 *
2773 * [67] Reference ::= EntityRef | CharRef
2774 *
2775 * [69] PEReference ::= '%' Name ';'
2776 *
2777 * Returns A newly allocated string with the substitution done. The caller
2778 * must deallocate it !
2779 */
2780xmlChar *
2781xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2782 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002783 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002784 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2785 end, end2, end3));
2786}
Owen Taylor3473f882001-02-23 17:55:21 +00002787
2788/************************************************************************
2789 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002790 * Commodity functions, cleanup needed ? *
2791 * *
2792 ************************************************************************/
2793
2794/**
2795 * areBlanks:
2796 * @ctxt: an XML parser context
2797 * @str: a xmlChar *
2798 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002799 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002800 *
2801 * Is this a sequence of blank chars that one can ignore ?
2802 *
2803 * Returns 1 if ignorable 0 otherwise.
2804 */
2805
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002806static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2807 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002808 int i, ret;
2809 xmlNodePtr lastChild;
2810
Daniel Veillard05c13a22001-09-09 08:38:09 +00002811 /*
2812 * Don't spend time trying to differentiate them, the same callback is
2813 * used !
2814 */
2815 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002816 return(0);
2817
Owen Taylor3473f882001-02-23 17:55:21 +00002818 /*
2819 * Check for xml:space value.
2820 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002821 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2822 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002823 return(0);
2824
2825 /*
2826 * Check that the string is made of blanks
2827 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002828 if (blank_chars == 0) {
2829 for (i = 0;i < len;i++)
2830 if (!(IS_BLANK_CH(str[i]))) return(0);
2831 }
Owen Taylor3473f882001-02-23 17:55:21 +00002832
2833 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002834 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002835 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002836 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002837 if (ctxt->myDoc != NULL) {
2838 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2839 if (ret == 0) return(1);
2840 if (ret == 1) return(0);
2841 }
2842
2843 /*
2844 * Otherwise, heuristic :-\
2845 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002846 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002847 if ((ctxt->node->children == NULL) &&
2848 (RAW == '<') && (NXT(1) == '/')) return(0);
2849
2850 lastChild = xmlGetLastChild(ctxt->node);
2851 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002852 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2853 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002854 } else if (xmlNodeIsText(lastChild))
2855 return(0);
2856 else if ((ctxt->node->children != NULL) &&
2857 (xmlNodeIsText(ctxt->node->children)))
2858 return(0);
2859 return(1);
2860}
2861
Owen Taylor3473f882001-02-23 17:55:21 +00002862/************************************************************************
2863 * *
2864 * Extra stuff for namespace support *
2865 * Relates to http://www.w3.org/TR/WD-xml-names *
2866 * *
2867 ************************************************************************/
2868
2869/**
2870 * xmlSplitQName:
2871 * @ctxt: an XML parser context
2872 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002873 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002874 *
2875 * parse an UTF8 encoded XML qualified name string
2876 *
2877 * [NS 5] QName ::= (Prefix ':')? LocalPart
2878 *
2879 * [NS 6] Prefix ::= NCName
2880 *
2881 * [NS 7] LocalPart ::= NCName
2882 *
2883 * Returns the local part, and prefix is updated
2884 * to get the Prefix if any.
2885 */
2886
2887xmlChar *
2888xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2889 xmlChar buf[XML_MAX_NAMELEN + 5];
2890 xmlChar *buffer = NULL;
2891 int len = 0;
2892 int max = XML_MAX_NAMELEN;
2893 xmlChar *ret = NULL;
2894 const xmlChar *cur = name;
2895 int c;
2896
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002897 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002898 *prefix = NULL;
2899
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002900 if (cur == NULL) return(NULL);
2901
Owen Taylor3473f882001-02-23 17:55:21 +00002902#ifndef XML_XML_NAMESPACE
2903 /* xml: prefix is not really a namespace */
2904 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2905 (cur[2] == 'l') && (cur[3] == ':'))
2906 return(xmlStrdup(name));
2907#endif
2908
Daniel Veillard597bc482003-07-24 16:08:28 +00002909 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002910 if (cur[0] == ':')
2911 return(xmlStrdup(name));
2912
2913 c = *cur++;
2914 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2915 buf[len++] = c;
2916 c = *cur++;
2917 }
2918 if (len >= max) {
2919 /*
2920 * Okay someone managed to make a huge name, so he's ready to pay
2921 * for the processing speed.
2922 */
2923 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002924
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002925 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002926 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002927 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002928 return(NULL);
2929 }
2930 memcpy(buffer, buf, len);
2931 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2932 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002933 xmlChar *tmp;
2934
Owen Taylor3473f882001-02-23 17:55:21 +00002935 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002936 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002937 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002938 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002939 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002940 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002941 return(NULL);
2942 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002943 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002944 }
2945 buffer[len++] = c;
2946 c = *cur++;
2947 }
2948 buffer[len] = 0;
2949 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002950
Daniel Veillard597bc482003-07-24 16:08:28 +00002951 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002952 if (buffer != NULL)
2953 xmlFree(buffer);
2954 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002955 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002956 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002957
Owen Taylor3473f882001-02-23 17:55:21 +00002958 if (buffer == NULL)
2959 ret = xmlStrndup(buf, len);
2960 else {
2961 ret = buffer;
2962 buffer = NULL;
2963 max = XML_MAX_NAMELEN;
2964 }
2965
2966
2967 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002968 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002969 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002970 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002971 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002972 }
Owen Taylor3473f882001-02-23 17:55:21 +00002973 len = 0;
2974
Daniel Veillardbb284f42002-10-16 18:02:47 +00002975 /*
2976 * Check that the first character is proper to start
2977 * a new name
2978 */
2979 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2980 ((c >= 0x41) && (c <= 0x5A)) ||
2981 (c == '_') || (c == ':'))) {
2982 int l;
2983 int first = CUR_SCHAR(cur, l);
2984
2985 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002986 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002987 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002988 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002989 }
2990 }
2991 cur++;
2992
Owen Taylor3473f882001-02-23 17:55:21 +00002993 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2994 buf[len++] = c;
2995 c = *cur++;
2996 }
2997 if (len >= max) {
2998 /*
2999 * Okay someone managed to make a huge name, so he's ready to pay
3000 * for the processing speed.
3001 */
3002 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003003
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003004 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003005 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003006 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003007 return(NULL);
3008 }
3009 memcpy(buffer, buf, len);
3010 while (c != 0) { /* tested bigname2.xml */
3011 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003012 xmlChar *tmp;
3013
Owen Taylor3473f882001-02-23 17:55:21 +00003014 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003015 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003016 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003017 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003018 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003020 return(NULL);
3021 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003022 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003023 }
3024 buffer[len++] = c;
3025 c = *cur++;
3026 }
3027 buffer[len] = 0;
3028 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003029
Owen Taylor3473f882001-02-23 17:55:21 +00003030 if (buffer == NULL)
3031 ret = xmlStrndup(buf, len);
3032 else {
3033 ret = buffer;
3034 }
3035 }
3036
3037 return(ret);
3038}
3039
3040/************************************************************************
3041 * *
3042 * The parser itself *
3043 * Relates to http://www.w3.org/TR/REC-xml *
3044 * *
3045 ************************************************************************/
3046
Daniel Veillard34e3f642008-07-29 09:02:27 +00003047/************************************************************************
3048 * *
3049 * Routines to parse Name, NCName and NmToken *
3050 * *
3051 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003052#ifdef DEBUG
3053static unsigned long nbParseName = 0;
3054static unsigned long nbParseNmToken = 0;
3055static unsigned long nbParseNCName = 0;
3056static unsigned long nbParseNCNameComplex = 0;
3057static unsigned long nbParseNameComplex = 0;
3058static unsigned long nbParseStringName = 0;
3059#endif
3060
Daniel Veillard34e3f642008-07-29 09:02:27 +00003061/*
3062 * The two following functions are related to the change of accepted
3063 * characters for Name and NmToken in the Revision 5 of XML-1.0
3064 * They correspond to the modified production [4] and the new production [4a]
3065 * changes in that revision. Also note that the macros used for the
3066 * productions Letter, Digit, CombiningChar and Extender are not needed
3067 * anymore.
3068 * We still keep compatibility to pre-revision5 parsing semantic if the
3069 * new XML_PARSE_OLD10 option is given to the parser.
3070 */
3071static int
3072xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3073 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3074 /*
3075 * Use the new checks of production [4] [4a] amd [5] of the
3076 * Update 5 of XML-1.0
3077 */
3078 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3079 (((c >= 'a') && (c <= 'z')) ||
3080 ((c >= 'A') && (c <= 'Z')) ||
3081 (c == '_') || (c == ':') ||
3082 ((c >= 0xC0) && (c <= 0xD6)) ||
3083 ((c >= 0xD8) && (c <= 0xF6)) ||
3084 ((c >= 0xF8) && (c <= 0x2FF)) ||
3085 ((c >= 0x370) && (c <= 0x37D)) ||
3086 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3087 ((c >= 0x200C) && (c <= 0x200D)) ||
3088 ((c >= 0x2070) && (c <= 0x218F)) ||
3089 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3090 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3091 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3092 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3093 ((c >= 0x10000) && (c <= 0xEFFFF))))
3094 return(1);
3095 } else {
3096 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3097 return(1);
3098 }
3099 return(0);
3100}
3101
3102static int
3103xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3104 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3105 /*
3106 * Use the new checks of production [4] [4a] amd [5] of the
3107 * Update 5 of XML-1.0
3108 */
3109 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3110 (((c >= 'a') && (c <= 'z')) ||
3111 ((c >= 'A') && (c <= 'Z')) ||
3112 ((c >= '0') && (c <= '9')) || /* !start */
3113 (c == '_') || (c == ':') ||
3114 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3115 ((c >= 0xC0) && (c <= 0xD6)) ||
3116 ((c >= 0xD8) && (c <= 0xF6)) ||
3117 ((c >= 0xF8) && (c <= 0x2FF)) ||
3118 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3119 ((c >= 0x370) && (c <= 0x37D)) ||
3120 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3121 ((c >= 0x200C) && (c <= 0x200D)) ||
3122 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3123 ((c >= 0x2070) && (c <= 0x218F)) ||
3124 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3125 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3126 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3127 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3128 ((c >= 0x10000) && (c <= 0xEFFFF))))
3129 return(1);
3130 } else {
3131 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3132 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003133 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003134 (IS_COMBINING(c)) ||
3135 (IS_EXTENDER(c)))
3136 return(1);
3137 }
3138 return(0);
3139}
3140
Daniel Veillarde57ec792003-09-10 10:50:59 +00003141static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003142 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003143
Daniel Veillard34e3f642008-07-29 09:02:27 +00003144static const xmlChar *
3145xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3146 int len = 0, l;
3147 int c;
3148 int count = 0;
3149
Daniel Veillardc6561462009-03-25 10:22:31 +00003150#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003151 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003152#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003153
3154 /*
3155 * Handler for more complex cases
3156 */
3157 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003158 if (ctxt->instate == XML_PARSER_EOF)
3159 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003160 c = CUR_CHAR(l);
3161 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162 /*
3163 * Use the new checks of production [4] [4a] amd [5] of the
3164 * Update 5 of XML-1.0
3165 */
3166 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3167 (!(((c >= 'a') && (c <= 'z')) ||
3168 ((c >= 'A') && (c <= 'Z')) ||
3169 (c == '_') || (c == ':') ||
3170 ((c >= 0xC0) && (c <= 0xD6)) ||
3171 ((c >= 0xD8) && (c <= 0xF6)) ||
3172 ((c >= 0xF8) && (c <= 0x2FF)) ||
3173 ((c >= 0x370) && (c <= 0x37D)) ||
3174 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175 ((c >= 0x200C) && (c <= 0x200D)) ||
3176 ((c >= 0x2070) && (c <= 0x218F)) ||
3177 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3182 return(NULL);
3183 }
3184 len += l;
3185 NEXTL(l);
3186 c = CUR_CHAR(l);
3187 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3188 (((c >= 'a') && (c <= 'z')) ||
3189 ((c >= 'A') && (c <= 'Z')) ||
3190 ((c >= '0') && (c <= '9')) || /* !start */
3191 (c == '_') || (c == ':') ||
3192 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3193 ((c >= 0xC0) && (c <= 0xD6)) ||
3194 ((c >= 0xD8) && (c <= 0xF6)) ||
3195 ((c >= 0xF8) && (c <= 0x2FF)) ||
3196 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3197 ((c >= 0x370) && (c <= 0x37D)) ||
3198 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3199 ((c >= 0x200C) && (c <= 0x200D)) ||
3200 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3201 ((c >= 0x2070) && (c <= 0x218F)) ||
3202 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3204 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3205 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206 ((c >= 0x10000) && (c <= 0xEFFFF))
3207 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003208 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003209 count = 0;
3210 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003211 if (ctxt->instate == XML_PARSER_EOF)
3212 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003213 }
3214 len += l;
3215 NEXTL(l);
3216 c = CUR_CHAR(l);
3217 }
3218 } else {
3219 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3220 (!IS_LETTER(c) && (c != '_') &&
3221 (c != ':'))) {
3222 return(NULL);
3223 }
3224 len += l;
3225 NEXTL(l);
3226 c = CUR_CHAR(l);
3227
3228 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3229 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3230 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003231 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003232 (IS_COMBINING(c)) ||
3233 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003234 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003235 count = 0;
3236 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003237 if (ctxt->instate == XML_PARSER_EOF)
3238 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003239 }
3240 len += l;
3241 NEXTL(l);
3242 c = CUR_CHAR(l);
3243 }
3244 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003245 if ((len > XML_MAX_NAME_LENGTH) &&
3246 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3247 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3248 return(NULL);
3249 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003250 if (ctxt->input->cur - ctxt->input->base < len) {
3251 /*
3252 * There were a couple of bugs where PERefs lead to to a change
3253 * of the buffer. Check the buffer size to avoid passing an invalid
3254 * pointer to xmlDictLookup.
3255 */
3256 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3257 "unexpected change of input buffer");
3258 return (NULL);
3259 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003260 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3261 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3262 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3263}
3264
Owen Taylor3473f882001-02-23 17:55:21 +00003265/**
3266 * xmlParseName:
3267 * @ctxt: an XML parser context
3268 *
3269 * parse an XML name.
3270 *
3271 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3272 * CombiningChar | Extender
3273 *
3274 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3275 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003276 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003277 *
3278 * Returns the Name parsed or NULL
3279 */
3280
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003281const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003282xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003283 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003284 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003285 int count = 0;
3286
3287 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003288
Daniel Veillardc6561462009-03-25 10:22:31 +00003289#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003290 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003291#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003292
Daniel Veillard48b2f892001-02-25 16:11:03 +00003293 /*
3294 * Accelerator for simple ASCII names
3295 */
3296 in = ctxt->input->cur;
3297 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3298 ((*in >= 0x41) && (*in <= 0x5A)) ||
3299 (*in == '_') || (*in == ':')) {
3300 in++;
3301 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3302 ((*in >= 0x41) && (*in <= 0x5A)) ||
3303 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003304 (*in == '_') || (*in == '-') ||
3305 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003306 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003307 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003308 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003309 if ((count > XML_MAX_NAME_LENGTH) &&
3310 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3311 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3312 return(NULL);
3313 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003314 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003315 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003316 ctxt->nbChars += count;
3317 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003318 if (ret == NULL)
3319 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003320 return(ret);
3321 }
3322 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003324 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003325}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003326
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327static const xmlChar *
3328xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3329 int len = 0, l;
3330 int c;
3331 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003332 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003333
Daniel Veillardc6561462009-03-25 10:22:31 +00003334#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003335 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003336#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003337
3338 /*
3339 * Handler for more complex cases
3340 */
3341 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003342 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003343 c = CUR_CHAR(l);
3344 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3345 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3346 return(NULL);
3347 }
3348
3349 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3350 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003351 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003352 if ((len > XML_MAX_NAME_LENGTH) &&
3353 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3354 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3355 return(NULL);
3356 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003357 count = 0;
3358 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003359 if (ctxt->instate == XML_PARSER_EOF)
3360 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003361 }
3362 len += l;
3363 NEXTL(l);
3364 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003365 if (c == 0) {
3366 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003367 /*
3368 * when shrinking to extend the buffer we really need to preserve
3369 * the part of the name we already parsed. Hence rolling back
3370 * by current lenght.
3371 */
3372 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003373 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003374 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003375 if (ctxt->instate == XML_PARSER_EOF)
3376 return(NULL);
3377 c = CUR_CHAR(l);
3378 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003379 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003380 if ((len > XML_MAX_NAME_LENGTH) &&
3381 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3382 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3383 return(NULL);
3384 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003385 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003386}
3387
3388/**
3389 * xmlParseNCName:
3390 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003391 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003392 *
3393 * parse an XML name.
3394 *
3395 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3396 * CombiningChar | Extender
3397 *
3398 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3399 *
3400 * Returns the Name parsed or NULL
3401 */
3402
3403static const xmlChar *
3404xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003405 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003406 const xmlChar *ret;
3407 int count = 0;
3408
Daniel Veillardc6561462009-03-25 10:22:31 +00003409#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003410 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003411#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003412
3413 /*
3414 * Accelerator for simple ASCII names
3415 */
3416 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003417 e = ctxt->input->end;
3418 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3419 ((*in >= 0x41) && (*in <= 0x5A)) ||
3420 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003421 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003422 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3423 ((*in >= 0x41) && (*in <= 0x5A)) ||
3424 ((*in >= 0x30) && (*in <= 0x39)) ||
3425 (*in == '_') || (*in == '-') ||
3426 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003428 if (in >= e)
3429 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003430 if ((*in > 0) && (*in < 0x80)) {
3431 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003432 if ((count > XML_MAX_NAME_LENGTH) &&
3433 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3434 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3435 return(NULL);
3436 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003437 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3438 ctxt->input->cur = in;
3439 ctxt->nbChars += count;
3440 ctxt->input->col += count;
3441 if (ret == NULL) {
3442 xmlErrMemory(ctxt, NULL);
3443 }
3444 return(ret);
3445 }
3446 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003447complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003448 return(xmlParseNCNameComplex(ctxt));
3449}
3450
Daniel Veillard46de64e2002-05-29 08:21:33 +00003451/**
3452 * xmlParseNameAndCompare:
3453 * @ctxt: an XML parser context
3454 *
3455 * parse an XML name and compares for match
3456 * (specialized for endtag parsing)
3457 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003458 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3459 * and the name for mismatch
3460 */
3461
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003462static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003463xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003464 register const xmlChar *cmp = other;
3465 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003466 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003467
3468 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003469 if (ctxt->instate == XML_PARSER_EOF)
3470 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003471
Daniel Veillard46de64e2002-05-29 08:21:33 +00003472 in = ctxt->input->cur;
3473 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003474 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003475 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003476 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003477 }
William M. Brack76e95df2003-10-18 16:20:14 +00003478 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003479 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003480 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003481 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003482 }
3483 /* failure (or end of input buffer), check with full function */
3484 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003485 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003486 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003487 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003488 }
3489 return ret;
3490}
3491
Owen Taylor3473f882001-02-23 17:55:21 +00003492/**
3493 * xmlParseStringName:
3494 * @ctxt: an XML parser context
3495 * @str: a pointer to the string pointer (IN/OUT)
3496 *
3497 * parse an XML name.
3498 *
3499 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3500 * CombiningChar | Extender
3501 *
3502 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3503 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003504 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003505 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003506 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003507 * is updated to the current location in the string.
3508 */
3509
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003510static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003511xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3512 xmlChar buf[XML_MAX_NAMELEN + 5];
3513 const xmlChar *cur = *str;
3514 int len = 0, l;
3515 int c;
3516
Daniel Veillardc6561462009-03-25 10:22:31 +00003517#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003518 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003519#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003520
Owen Taylor3473f882001-02-23 17:55:21 +00003521 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003522 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003523 return(NULL);
3524 }
3525
Daniel Veillard34e3f642008-07-29 09:02:27 +00003526 COPY_BUF(l,buf,len,c);
3527 cur += l;
3528 c = CUR_SCHAR(cur, l);
3529 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003530 COPY_BUF(l,buf,len,c);
3531 cur += l;
3532 c = CUR_SCHAR(cur, l);
3533 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3534 /*
3535 * Okay someone managed to make a huge name, so he's ready to pay
3536 * for the processing speed.
3537 */
3538 xmlChar *buffer;
3539 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003540
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003541 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003542 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003543 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003544 return(NULL);
3545 }
3546 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003547 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003548 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003549 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003550
3551 if ((len > XML_MAX_NAME_LENGTH) &&
3552 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3553 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3554 xmlFree(buffer);
3555 return(NULL);
3556 }
Owen Taylor3473f882001-02-23 17:55:21 +00003557 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003558 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003559 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003560 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003561 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003562 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003563 return(NULL);
3564 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003565 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003566 }
3567 COPY_BUF(l,buffer,len,c);
3568 cur += l;
3569 c = CUR_SCHAR(cur, l);
3570 }
3571 buffer[len] = 0;
3572 *str = cur;
3573 return(buffer);
3574 }
3575 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003576 if ((len > XML_MAX_NAME_LENGTH) &&
3577 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3578 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3579 return(NULL);
3580 }
Owen Taylor3473f882001-02-23 17:55:21 +00003581 *str = cur;
3582 return(xmlStrndup(buf, len));
3583}
3584
3585/**
3586 * xmlParseNmtoken:
3587 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003588 *
Owen Taylor3473f882001-02-23 17:55:21 +00003589 * parse an XML Nmtoken.
3590 *
3591 * [7] Nmtoken ::= (NameChar)+
3592 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003593 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003594 *
3595 * Returns the Nmtoken parsed or NULL
3596 */
3597
3598xmlChar *
3599xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3600 xmlChar buf[XML_MAX_NAMELEN + 5];
3601 int len = 0, l;
3602 int c;
3603 int count = 0;
3604
Daniel Veillardc6561462009-03-25 10:22:31 +00003605#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003606 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003607#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003608
Owen Taylor3473f882001-02-23 17:55:21 +00003609 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003610 if (ctxt->instate == XML_PARSER_EOF)
3611 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003612 c = CUR_CHAR(l);
3613
Daniel Veillard34e3f642008-07-29 09:02:27 +00003614 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003615 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003616 count = 0;
3617 GROW;
3618 }
3619 COPY_BUF(l,buf,len,c);
3620 NEXTL(l);
3621 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003622 if (c == 0) {
3623 count = 0;
3624 GROW;
3625 if (ctxt->instate == XML_PARSER_EOF)
3626 return(NULL);
3627 c = CUR_CHAR(l);
3628 }
Owen Taylor3473f882001-02-23 17:55:21 +00003629 if (len >= XML_MAX_NAMELEN) {
3630 /*
3631 * Okay someone managed to make a huge token, so he's ready to pay
3632 * for the processing speed.
3633 */
3634 xmlChar *buffer;
3635 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003636
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003637 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003638 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003639 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003640 return(NULL);
3641 }
3642 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003643 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003644 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003645 count = 0;
3646 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003647 if (ctxt->instate == XML_PARSER_EOF) {
3648 xmlFree(buffer);
3649 return(NULL);
3650 }
Owen Taylor3473f882001-02-23 17:55:21 +00003651 }
3652 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003653 xmlChar *tmp;
3654
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003655 if ((max > XML_MAX_NAME_LENGTH) &&
3656 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3657 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3658 xmlFree(buffer);
3659 return(NULL);
3660 }
Owen Taylor3473f882001-02-23 17:55:21 +00003661 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003662 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003663 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003664 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003665 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003666 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003667 return(NULL);
3668 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003669 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003670 }
3671 COPY_BUF(l,buffer,len,c);
3672 NEXTL(l);
3673 c = CUR_CHAR(l);
3674 }
3675 buffer[len] = 0;
3676 return(buffer);
3677 }
3678 }
3679 if (len == 0)
3680 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003681 if ((len > XML_MAX_NAME_LENGTH) &&
3682 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3683 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3684 return(NULL);
3685 }
Owen Taylor3473f882001-02-23 17:55:21 +00003686 return(xmlStrndup(buf, len));
3687}
3688
3689/**
3690 * xmlParseEntityValue:
3691 * @ctxt: an XML parser context
3692 * @orig: if non-NULL store a copy of the original entity value
3693 *
3694 * parse a value for ENTITY declarations
3695 *
3696 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3697 * "'" ([^%&'] | PEReference | Reference)* "'"
3698 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003699 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003700 */
3701
3702xmlChar *
3703xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3704 xmlChar *buf = NULL;
3705 int len = 0;
3706 int size = XML_PARSER_BUFFER_SIZE;
3707 int c, l;
3708 xmlChar stop;
3709 xmlChar *ret = NULL;
3710 const xmlChar *cur = NULL;
3711 xmlParserInputPtr input;
3712
3713 if (RAW == '"') stop = '"';
3714 else if (RAW == '\'') stop = '\'';
3715 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003716 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003717 return(NULL);
3718 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003719 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003721 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003722 return(NULL);
3723 }
3724
3725 /*
3726 * The content of the entity definition is copied in a buffer.
3727 */
3728
3729 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3730 input = ctxt->input;
3731 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003732 if (ctxt->instate == XML_PARSER_EOF) {
3733 xmlFree(buf);
3734 return(NULL);
3735 }
Owen Taylor3473f882001-02-23 17:55:21 +00003736 NEXT;
3737 c = CUR_CHAR(l);
3738 /*
3739 * NOTE: 4.4.5 Included in Literal
3740 * When a parameter entity reference appears in a literal entity
3741 * value, ... a single or double quote character in the replacement
3742 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003743 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003744 * In practice it means we stop the loop only when back at parsing
3745 * the initial entity and the quote is found
3746 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003747 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3748 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003749 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003750 xmlChar *tmp;
3751
Owen Taylor3473f882001-02-23 17:55:21 +00003752 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003753 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3754 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003755 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003756 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003757 return(NULL);
3758 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003759 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003760 }
3761 COPY_BUF(l,buf,len,c);
3762 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003763
3764 GROW;
3765 c = CUR_CHAR(l);
3766 if (c == 0) {
3767 GROW;
3768 c = CUR_CHAR(l);
3769 }
3770 }
3771 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003772 if (ctxt->instate == XML_PARSER_EOF) {
3773 xmlFree(buf);
3774 return(NULL);
3775 }
Owen Taylor3473f882001-02-23 17:55:21 +00003776
3777 /*
3778 * Raise problem w.r.t. '&' and '%' being used in non-entities
3779 * reference constructs. Note Charref will be handled in
3780 * xmlStringDecodeEntities()
3781 */
3782 cur = buf;
3783 while (*cur != 0) { /* non input consuming */
3784 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3785 xmlChar *name;
3786 xmlChar tmp = *cur;
3787
3788 cur++;
3789 name = xmlParseStringName(ctxt, &cur);
3790 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003791 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003792 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003793 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003794 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003795 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3796 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003797 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003798 }
3799 if (name != NULL)
3800 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003801 if (*cur == 0)
3802 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003803 }
3804 cur++;
3805 }
3806
3807 /*
3808 * Then PEReference entities are substituted.
3809 */
3810 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003811 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003812 xmlFree(buf);
3813 } else {
3814 NEXT;
3815 /*
3816 * NOTE: 4.4.7 Bypassed
3817 * When a general entity reference appears in the EntityValue in
3818 * an entity declaration, it is bypassed and left as is.
3819 * so XML_SUBSTITUTE_REF is not set here.
3820 */
Peter Simons8f30bdf2016-04-15 11:56:55 +02003821 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003822 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3823 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003824 --ctxt->depth;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003825 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003826 *orig = buf;
3827 else
3828 xmlFree(buf);
3829 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003830
Owen Taylor3473f882001-02-23 17:55:21 +00003831 return(ret);
3832}
3833
3834/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003835 * xmlParseAttValueComplex:
3836 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003837 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003838 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003839 *
3840 * parse a value for an attribute, this is the fallback function
3841 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003842 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003843 *
3844 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3845 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003846static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003847xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003848 xmlChar limit = 0;
3849 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003850 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003851 size_t len = 0;
3852 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003853 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003854 xmlChar *current = NULL;
3855 xmlEntityPtr ent;
3856
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (NXT(0) == '"') {
3858 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3859 limit = '"';
3860 NEXT;
3861 } else if (NXT(0) == '\'') {
3862 limit = '\'';
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864 NEXT;
3865 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003866 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003867 return(NULL);
3868 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003869
Owen Taylor3473f882001-02-23 17:55:21 +00003870 /*
3871 * allocate a translation buffer.
3872 */
3873 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003874 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003875 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003876
3877 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003878 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003879 */
3880 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003881 while (((NXT(0) != limit) && /* checked */
3882 (IS_CHAR(c)) && (c != '<')) &&
3883 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003884 /*
3885 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3886 * special option is given
3887 */
3888 if ((len > XML_MAX_TEXT_LENGTH) &&
3889 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3890 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003891 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003892 goto mem_error;
3893 }
Owen Taylor3473f882001-02-23 17:55:21 +00003894 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003895 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003896 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003897 if (NXT(1) == '#') {
3898 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003899
Owen Taylor3473f882001-02-23 17:55:21 +00003900 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003901 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003902 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003903 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003904 }
3905 buf[len++] = '&';
3906 } else {
3907 /*
3908 * The reparsing will be done in xmlStringGetNodeList()
3909 * called by the attribute() function in SAX.c
3910 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003911 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003912 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003913 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003914 buf[len++] = '&';
3915 buf[len++] = '#';
3916 buf[len++] = '3';
3917 buf[len++] = '8';
3918 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003919 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003920 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003921 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003922 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003923 }
Owen Taylor3473f882001-02-23 17:55:21 +00003924 len += xmlCopyChar(0, &buf[len], val);
3925 }
3926 } else {
3927 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003928 ctxt->nbentities++;
3929 if (ent != NULL)
3930 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003931 if ((ent != NULL) &&
3932 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003933 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003934 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003935 }
3936 if ((ctxt->replaceEntities == 0) &&
3937 (ent->content[0] == '&')) {
3938 buf[len++] = '&';
3939 buf[len++] = '#';
3940 buf[len++] = '3';
3941 buf[len++] = '8';
3942 buf[len++] = ';';
3943 } else {
3944 buf[len++] = ent->content[0];
3945 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003946 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003947 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003948 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02003949 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003951 XML_SUBSTITUTE_REF,
3952 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003953 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003954 if (rep != NULL) {
3955 current = rep;
3956 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003957 if ((*current == 0xD) || (*current == 0xA) ||
3958 (*current == 0x9)) {
3959 buf[len++] = 0x20;
3960 current++;
3961 } else
3962 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003963 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003964 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003965 }
3966 }
3967 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003968 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003969 }
3970 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003971 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003972 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003973 }
Owen Taylor3473f882001-02-23 17:55:21 +00003974 if (ent->content != NULL)
3975 buf[len++] = ent->content[0];
3976 }
3977 } else if (ent != NULL) {
3978 int i = xmlStrlen(ent->name);
3979 const xmlChar *cur = ent->name;
3980
3981 /*
3982 * This may look absurd but is needed to detect
3983 * entities problems
3984 */
3985 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003986 (ent->content != NULL) && (ent->checked == 0)) {
3987 unsigned long oldnbent = ctxt->nbentities;
3988
Peter Simons8f30bdf2016-04-15 11:56:55 +02003989 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003990 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003991 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003992 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003993
Daniel Veillardcff25462013-03-11 15:57:55 +08003994 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003995 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08003996 if (xmlStrchr(rep, '<'))
3997 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003998 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003999 rep = NULL;
4000 }
Owen Taylor3473f882001-02-23 17:55:21 +00004001 }
4002
4003 /*
4004 * Just output the reference
4005 */
4006 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004007 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004008 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004009 }
4010 for (;i > 0;i--)
4011 buf[len++] = *cur++;
4012 buf[len++] = ';';
4013 }
4014 }
4015 } else {
4016 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004017 if ((len != 0) || (!normalize)) {
4018 if ((!normalize) || (!in_space)) {
4019 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004020 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004021 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004022 }
4023 }
4024 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004025 }
4026 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004027 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004028 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004029 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004030 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004031 }
4032 }
4033 NEXTL(l);
4034 }
4035 GROW;
4036 c = CUR_CHAR(l);
4037 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004038 if (ctxt->instate == XML_PARSER_EOF)
4039 goto error;
4040
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004041 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004042 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004043 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004044 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004045 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004046 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004047 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004048 if ((c != 0) && (!IS_CHAR(c))) {
4049 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4050 "invalid character in attribute value\n");
4051 } else {
4052 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4053 "AttValue: ' expected\n");
4054 }
Owen Taylor3473f882001-02-23 17:55:21 +00004055 } else
4056 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004057
4058 /*
4059 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004060 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004061 */
4062 if (len >= INT_MAX) {
4063 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004064 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004065 goto mem_error;
4066 }
4067
4068 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004069 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004070
4071mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004072 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004073error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004074 if (buf != NULL)
4075 xmlFree(buf);
4076 if (rep != NULL)
4077 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004078 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004079}
4080
4081/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004082 * xmlParseAttValue:
4083 * @ctxt: an XML parser context
4084 *
4085 * parse a value for an attribute
4086 * Note: the parser won't do substitution of entities here, this
4087 * will be handled later in xmlStringGetNodeList
4088 *
4089 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4090 * "'" ([^<&'] | Reference)* "'"
4091 *
4092 * 3.3.3 Attribute-Value Normalization:
4093 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004094 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004095 * - a character reference is processed by appending the referenced
4096 * character to the attribute value
4097 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004098 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004099 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4100 * appending #x20 to the normalized value, except that only a single
4101 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004102 * parsed entity or the literal entity value of an internal parsed entity
4103 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004104 * If the declared value is not CDATA, then the XML processor must further
4105 * process the normalized attribute value by discarding any leading and
4106 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004107 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004108 * All attributes for which no declaration has been read should be treated
4109 * by a non-validating parser as if declared CDATA.
4110 *
4111 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4112 */
4113
4114
4115xmlChar *
4116xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004117 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004118 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004119}
4120
4121/**
Owen Taylor3473f882001-02-23 17:55:21 +00004122 * xmlParseSystemLiteral:
4123 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004124 *
Owen Taylor3473f882001-02-23 17:55:21 +00004125 * parse an XML Literal
4126 *
4127 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4128 *
4129 * Returns the SystemLiteral parsed or NULL
4130 */
4131
4132xmlChar *
4133xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4134 xmlChar *buf = NULL;
4135 int len = 0;
4136 int size = XML_PARSER_BUFFER_SIZE;
4137 int cur, l;
4138 xmlChar stop;
4139 int state = ctxt->instate;
4140 int count = 0;
4141
4142 SHRINK;
4143 if (RAW == '"') {
4144 NEXT;
4145 stop = '"';
4146 } else if (RAW == '\'') {
4147 NEXT;
4148 stop = '\'';
4149 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004150 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004151 return(NULL);
4152 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004153
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004154 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004155 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004156 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004157 return(NULL);
4158 }
4159 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4160 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004161 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004162 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004163 xmlChar *tmp;
4164
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004165 if ((size > XML_MAX_NAME_LENGTH) &&
4166 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4167 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4168 xmlFree(buf);
4169 ctxt->instate = (xmlParserInputState) state;
4170 return(NULL);
4171 }
Owen Taylor3473f882001-02-23 17:55:21 +00004172 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004173 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4174 if (tmp == NULL) {
4175 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004176 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004177 ctxt->instate = (xmlParserInputState) state;
4178 return(NULL);
4179 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004180 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004181 }
4182 count++;
4183 if (count > 50) {
4184 GROW;
4185 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004186 if (ctxt->instate == XML_PARSER_EOF) {
4187 xmlFree(buf);
4188 return(NULL);
4189 }
Owen Taylor3473f882001-02-23 17:55:21 +00004190 }
4191 COPY_BUF(l,buf,len,cur);
4192 NEXTL(l);
4193 cur = CUR_CHAR(l);
4194 if (cur == 0) {
4195 GROW;
4196 SHRINK;
4197 cur = CUR_CHAR(l);
4198 }
4199 }
4200 buf[len] = 0;
4201 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004202 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004203 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004204 } else {
4205 NEXT;
4206 }
4207 return(buf);
4208}
4209
4210/**
4211 * xmlParsePubidLiteral:
4212 * @ctxt: an XML parser context
4213 *
4214 * parse an XML public literal
4215 *
4216 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4217 *
4218 * Returns the PubidLiteral parsed or NULL.
4219 */
4220
4221xmlChar *
4222xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4223 xmlChar *buf = NULL;
4224 int len = 0;
4225 int size = XML_PARSER_BUFFER_SIZE;
4226 xmlChar cur;
4227 xmlChar stop;
4228 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004229 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004230
4231 SHRINK;
4232 if (RAW == '"') {
4233 NEXT;
4234 stop = '"';
4235 } else if (RAW == '\'') {
4236 NEXT;
4237 stop = '\'';
4238 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004239 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004240 return(NULL);
4241 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004242 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004243 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004244 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004245 return(NULL);
4246 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004247 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004248 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004249 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004250 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004251 xmlChar *tmp;
4252
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004253 if ((size > XML_MAX_NAME_LENGTH) &&
4254 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4255 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4256 xmlFree(buf);
4257 return(NULL);
4258 }
Owen Taylor3473f882001-02-23 17:55:21 +00004259 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004260 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4261 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004262 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004263 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004264 return(NULL);
4265 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004266 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004267 }
4268 buf[len++] = cur;
4269 count++;
4270 if (count > 50) {
4271 GROW;
4272 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004273 if (ctxt->instate == XML_PARSER_EOF) {
4274 xmlFree(buf);
4275 return(NULL);
4276 }
Owen Taylor3473f882001-02-23 17:55:21 +00004277 }
4278 NEXT;
4279 cur = CUR;
4280 if (cur == 0) {
4281 GROW;
4282 SHRINK;
4283 cur = CUR;
4284 }
4285 }
4286 buf[len] = 0;
4287 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004288 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004289 } else {
4290 NEXT;
4291 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004292 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004293 return(buf);
4294}
4295
Daniel Veillard8ed10722009-08-20 19:17:36 +02004296static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004297
4298/*
4299 * used for the test in the inner loop of the char data testing
4300 */
4301static const unsigned char test_char_data[256] = {
4302 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4303 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4304 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4305 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4306 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4307 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4308 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4309 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4310 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4311 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4312 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4313 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4314 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4315 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4316 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4317 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4318 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4319 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4320 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4321 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4322 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4323 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4324 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4334};
4335
Owen Taylor3473f882001-02-23 17:55:21 +00004336/**
4337 * xmlParseCharData:
4338 * @ctxt: an XML parser context
4339 * @cdata: int indicating whether we are within a CDATA section
4340 *
4341 * parse a CharData section.
4342 * if we are within a CDATA section ']]>' marks an end of section.
4343 *
4344 * The right angle bracket (>) may be represented using the string "&gt;",
4345 * and must, for compatibility, be escaped using "&gt;" or a character
4346 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004347 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004348 *
4349 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4350 */
4351
4352void
4353xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004354 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004355 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004356 int line = ctxt->input->line;
4357 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004358 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004359
4360 SHRINK;
4361 GROW;
4362 /*
4363 * Accelerated common case where input don't need to be
4364 * modified before passing it to the handler.
4365 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004366 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004367 in = ctxt->input->cur;
4368 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004369get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004370 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004371 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004372 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004373 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004374 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004375 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004376 goto get_more_space;
4377 }
4378 if (*in == '<') {
4379 nbchar = in - ctxt->input->cur;
4380 if (nbchar > 0) {
4381 const xmlChar *tmp = ctxt->input->cur;
4382 ctxt->input->cur = in;
4383
Daniel Veillard34099b42004-11-04 17:34:35 +00004384 if ((ctxt->sax != NULL) &&
4385 (ctxt->sax->ignorableWhitespace !=
4386 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004387 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004388 if (ctxt->sax->ignorableWhitespace != NULL)
4389 ctxt->sax->ignorableWhitespace(ctxt->userData,
4390 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004391 } else {
4392 if (ctxt->sax->characters != NULL)
4393 ctxt->sax->characters(ctxt->userData,
4394 tmp, nbchar);
4395 if (*ctxt->space == -1)
4396 *ctxt->space = -2;
4397 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004398 } else if ((ctxt->sax != NULL) &&
4399 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004400 ctxt->sax->characters(ctxt->userData,
4401 tmp, nbchar);
4402 }
4403 }
4404 return;
4405 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004406
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004407get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004408 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004409 while (test_char_data[*in]) {
4410 in++;
4411 ccol++;
4412 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004413 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004414 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004415 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004416 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004417 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004418 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004419 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004420 }
4421 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004422 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004423 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004424 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004425 return;
4426 }
4427 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004428 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004429 goto get_more;
4430 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004431 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004432 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004433 if ((ctxt->sax != NULL) &&
4434 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004435 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004436 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004437 const xmlChar *tmp = ctxt->input->cur;
4438 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004439
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004440 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004441 if (ctxt->sax->ignorableWhitespace != NULL)
4442 ctxt->sax->ignorableWhitespace(ctxt->userData,
4443 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004444 } else {
4445 if (ctxt->sax->characters != NULL)
4446 ctxt->sax->characters(ctxt->userData,
4447 tmp, nbchar);
4448 if (*ctxt->space == -1)
4449 *ctxt->space = -2;
4450 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004451 line = ctxt->input->line;
4452 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004453 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004454 if (ctxt->sax->characters != NULL)
4455 ctxt->sax->characters(ctxt->userData,
4456 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004457 line = ctxt->input->line;
4458 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004459 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004460 /* something really bad happened in the SAX callback */
4461 if (ctxt->instate != XML_PARSER_CONTENT)
4462 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004463 }
4464 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004465 if (*in == 0xD) {
4466 in++;
4467 if (*in == 0xA) {
4468 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004469 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004470 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004471 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004472 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004473 in--;
4474 }
4475 if (*in == '<') {
4476 return;
4477 }
4478 if (*in == '&') {
4479 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004480 }
4481 SHRINK;
4482 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004483 if (ctxt->instate == XML_PARSER_EOF)
4484 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004485 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004486 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004487 nbchar = 0;
4488 }
Daniel Veillard50582112001-03-26 22:52:16 +00004489 ctxt->input->line = line;
4490 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004491 xmlParseCharDataComplex(ctxt, cdata);
4492}
4493
Daniel Veillard01c13b52002-12-10 15:19:08 +00004494/**
4495 * xmlParseCharDataComplex:
4496 * @ctxt: an XML parser context
4497 * @cdata: int indicating whether we are within a CDATA section
4498 *
4499 * parse a CharData section.this is the fallback function
4500 * of xmlParseCharData() when the parsing requires handling
4501 * of non-ASCII characters.
4502 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004503static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004504xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004505 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4506 int nbchar = 0;
4507 int cur, l;
4508 int count = 0;
4509
4510 SHRINK;
4511 GROW;
4512 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004513 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004514 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004515 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004516 if ((cur == ']') && (NXT(1) == ']') &&
4517 (NXT(2) == '>')) {
4518 if (cdata) break;
4519 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004520 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004521 }
4522 }
4523 COPY_BUF(l,buf,nbchar,cur);
4524 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004525 buf[nbchar] = 0;
4526
Owen Taylor3473f882001-02-23 17:55:21 +00004527 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004528 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004529 */
4530 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004531 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004532 if (ctxt->sax->ignorableWhitespace != NULL)
4533 ctxt->sax->ignorableWhitespace(ctxt->userData,
4534 buf, nbchar);
4535 } else {
4536 if (ctxt->sax->characters != NULL)
4537 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004538 if ((ctxt->sax->characters !=
4539 ctxt->sax->ignorableWhitespace) &&
4540 (*ctxt->space == -1))
4541 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004542 }
4543 }
4544 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004545 /* something really bad happened in the SAX callback */
4546 if (ctxt->instate != XML_PARSER_CONTENT)
4547 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004548 }
4549 count++;
4550 if (count > 50) {
4551 GROW;
4552 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004553 if (ctxt->instate == XML_PARSER_EOF)
4554 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004555 }
4556 NEXTL(l);
4557 cur = CUR_CHAR(l);
4558 }
4559 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004560 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004561 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004562 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004563 */
4564 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004565 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004566 if (ctxt->sax->ignorableWhitespace != NULL)
4567 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4568 } else {
4569 if (ctxt->sax->characters != NULL)
4570 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004571 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4572 (*ctxt->space == -1))
4573 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004574 }
4575 }
4576 }
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004577 if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004578 /* Generate the error and skip the offending character */
4579 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4580 "PCDATA invalid Char value %d\n",
4581 cur);
4582 NEXTL(l);
4583 }
Owen Taylor3473f882001-02-23 17:55:21 +00004584}
4585
4586/**
4587 * xmlParseExternalID:
4588 * @ctxt: an XML parser context
4589 * @publicID: a xmlChar** receiving PubidLiteral
4590 * @strict: indicate whether we should restrict parsing to only
4591 * production [75], see NOTE below
4592 *
4593 * Parse an External ID or a Public ID
4594 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004595 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004596 * 'PUBLIC' S PubidLiteral S SystemLiteral
4597 *
4598 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4599 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4600 *
4601 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4602 *
4603 * Returns the function returns SystemLiteral and in the second
4604 * case publicID receives PubidLiteral, is strict is off
4605 * it is possible to return NULL and have publicID set.
4606 */
4607
4608xmlChar *
4609xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4610 xmlChar *URI = NULL;
4611
4612 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004613
4614 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004615 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004616 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004617 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4619 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004620 }
Owen Taylor3473f882001-02-23 17:55:21 +00004621 URI = xmlParseSystemLiteral(ctxt);
4622 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004623 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004624 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004625 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004626 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004627 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004629 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004630 }
Owen Taylor3473f882001-02-23 17:55:21 +00004631 *publicID = xmlParsePubidLiteral(ctxt);
4632 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004633 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004634 }
4635 if (strict) {
4636 /*
4637 * We don't handle [83] so "S SystemLiteral" is required.
4638 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004639 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004641 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004642 }
4643 } else {
4644 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004645 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004646 * "S SystemLiteral" is not detected. We skip blanks if no
4647 * system literal was found, but this is harmless since we must
4648 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004649 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004650 if (SKIP_BLANKS == 0) return(NULL);
4651 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004652 }
Owen Taylor3473f882001-02-23 17:55:21 +00004653 URI = xmlParseSystemLiteral(ctxt);
4654 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004655 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004656 }
4657 }
4658 return(URI);
4659}
4660
4661/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004662 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004663 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004664 * @buf: the already parsed part of the buffer
4665 * @len: number of bytes filles in the buffer
4666 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004667 *
4668 * Skip an XML (SGML) comment <!-- .... -->
4669 * The spec says that "For compatibility, the string "--" (double-hyphen)
4670 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004671 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004672 *
4673 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4674 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004675static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004676xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4677 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004678 int q, ql;
4679 int r, rl;
4680 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004681 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004682 int inputid;
4683
4684 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004685
Owen Taylor3473f882001-02-23 17:55:21 +00004686 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004687 len = 0;
4688 size = XML_PARSER_BUFFER_SIZE;
4689 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4690 if (buf == NULL) {
4691 xmlErrMemory(ctxt, NULL);
4692 return;
4693 }
Owen Taylor3473f882001-02-23 17:55:21 +00004694 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004695 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004696 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004697 if (q == 0)
4698 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004699 if (!IS_CHAR(q)) {
4700 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4701 "xmlParseComment: invalid xmlChar value %d\n",
4702 q);
4703 xmlFree (buf);
4704 return;
4705 }
Owen Taylor3473f882001-02-23 17:55:21 +00004706 NEXTL(ql);
4707 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004708 if (r == 0)
4709 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004710 if (!IS_CHAR(r)) {
4711 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4712 "xmlParseComment: invalid xmlChar value %d\n",
4713 q);
4714 xmlFree (buf);
4715 return;
4716 }
Owen Taylor3473f882001-02-23 17:55:21 +00004717 NEXTL(rl);
4718 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004719 if (cur == 0)
4720 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004721 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004722 ((cur != '>') ||
4723 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004724 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004725 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004726 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004727 if ((len > XML_MAX_TEXT_LENGTH) &&
4728 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4729 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4730 "Comment too big found", NULL);
4731 xmlFree (buf);
4732 return;
4733 }
Owen Taylor3473f882001-02-23 17:55:21 +00004734 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004735 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004736 size_t new_size;
4737
4738 new_size = size * 2;
4739 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004740 if (new_buf == NULL) {
4741 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004742 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004743 return;
4744 }
William M. Bracka3215c72004-07-31 16:24:01 +00004745 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004746 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004747 }
4748 COPY_BUF(ql,buf,len,q);
4749 q = r;
4750 ql = rl;
4751 r = cur;
4752 rl = l;
4753
4754 count++;
4755 if (count > 50) {
4756 GROW;
4757 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004758 if (ctxt->instate == XML_PARSER_EOF) {
4759 xmlFree(buf);
4760 return;
4761 }
Owen Taylor3473f882001-02-23 17:55:21 +00004762 }
4763 NEXTL(l);
4764 cur = CUR_CHAR(l);
4765 if (cur == 0) {
4766 SHRINK;
4767 GROW;
4768 cur = CUR_CHAR(l);
4769 }
4770 }
4771 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004772 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004773 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004774 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004775 } else if (!IS_CHAR(cur)) {
4776 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4777 "xmlParseComment: invalid xmlChar value %d\n",
4778 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004779 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004780 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004781 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004782 "Comment doesn't start and stop in the same"
4783 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004784 }
4785 NEXT;
4786 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4787 (!ctxt->disableSAX))
4788 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004789 }
Daniel Veillardda629342007-08-01 07:49:06 +00004790 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004791 return;
4792not_terminated:
4793 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4794 "Comment not terminated\n", NULL);
4795 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004796 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004797}
Daniel Veillardda629342007-08-01 07:49:06 +00004798
Daniel Veillard4c778d82005-01-23 17:37:44 +00004799/**
4800 * xmlParseComment:
4801 * @ctxt: an XML parser context
4802 *
4803 * Skip an XML (SGML) comment <!-- .... -->
4804 * The spec says that "For compatibility, the string "--" (double-hyphen)
4805 * must not occur within comments. "
4806 *
4807 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4808 */
4809void
4810xmlParseComment(xmlParserCtxtPtr ctxt) {
4811 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004812 size_t size = XML_PARSER_BUFFER_SIZE;
4813 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004814 xmlParserInputState state;
4815 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004816 size_t nbchar = 0;
4817 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004818 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004819
4820 /*
4821 * Check that there is a comment right here.
4822 */
4823 if ((RAW != '<') || (NXT(1) != '!') ||
4824 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004825 state = ctxt->instate;
4826 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004827 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004828 SKIP(4);
4829 SHRINK;
4830 GROW;
4831
4832 /*
4833 * Accelerated common case where input don't need to be
4834 * modified before passing it to the handler.
4835 */
4836 in = ctxt->input->cur;
4837 do {
4838 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004839 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004840 ctxt->input->line++; ctxt->input->col = 1;
4841 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004842 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004843 }
4844get_more:
4845 ccol = ctxt->input->col;
4846 while (((*in > '-') && (*in <= 0x7F)) ||
4847 ((*in >= 0x20) && (*in < '-')) ||
4848 (*in == 0x09)) {
4849 in++;
4850 ccol++;
4851 }
4852 ctxt->input->col = ccol;
4853 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004854 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004855 ctxt->input->line++; ctxt->input->col = 1;
4856 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004857 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004858 goto get_more;
4859 }
4860 nbchar = in - ctxt->input->cur;
4861 /*
4862 * save current set of data
4863 */
4864 if (nbchar > 0) {
4865 if ((ctxt->sax != NULL) &&
4866 (ctxt->sax->comment != NULL)) {
4867 if (buf == NULL) {
4868 if ((*in == '-') && (in[1] == '-'))
4869 size = nbchar + 1;
4870 else
4871 size = XML_PARSER_BUFFER_SIZE + nbchar;
4872 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4873 if (buf == NULL) {
4874 xmlErrMemory(ctxt, NULL);
4875 ctxt->instate = state;
4876 return;
4877 }
4878 len = 0;
4879 } else if (len + nbchar + 1 >= size) {
4880 xmlChar *new_buf;
4881 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4882 new_buf = (xmlChar *) xmlRealloc(buf,
4883 size * sizeof(xmlChar));
4884 if (new_buf == NULL) {
4885 xmlFree (buf);
4886 xmlErrMemory(ctxt, NULL);
4887 ctxt->instate = state;
4888 return;
4889 }
4890 buf = new_buf;
4891 }
4892 memcpy(&buf[len], ctxt->input->cur, nbchar);
4893 len += nbchar;
4894 buf[len] = 0;
4895 }
4896 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004897 if ((len > XML_MAX_TEXT_LENGTH) &&
4898 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4899 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4900 "Comment too big found", NULL);
4901 xmlFree (buf);
4902 return;
4903 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004904 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004905 if (*in == 0xA) {
4906 in++;
4907 ctxt->input->line++; ctxt->input->col = 1;
4908 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004909 if (*in == 0xD) {
4910 in++;
4911 if (*in == 0xA) {
4912 ctxt->input->cur = in;
4913 in++;
4914 ctxt->input->line++; ctxt->input->col = 1;
4915 continue; /* while */
4916 }
4917 in--;
4918 }
4919 SHRINK;
4920 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004921 if (ctxt->instate == XML_PARSER_EOF) {
4922 xmlFree(buf);
4923 return;
4924 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004925 in = ctxt->input->cur;
4926 if (*in == '-') {
4927 if (in[1] == '-') {
4928 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004929 if (ctxt->input->id != inputid) {
4930 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004931 "comment doesn't start and stop in the"
4932 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00004933 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004934 SKIP(3);
4935 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4936 (!ctxt->disableSAX)) {
4937 if (buf != NULL)
4938 ctxt->sax->comment(ctxt->userData, buf);
4939 else
4940 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4941 }
4942 if (buf != NULL)
4943 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08004944 if (ctxt->instate != XML_PARSER_EOF)
4945 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004946 return;
4947 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004948 if (buf != NULL) {
4949 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4950 "Double hyphen within comment: "
4951 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004952 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004953 } else
4954 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4955 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004956 in++;
4957 ctxt->input->col++;
4958 }
4959 in++;
4960 ctxt->input->col++;
4961 goto get_more;
4962 }
4963 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4964 xmlParseCommentComplex(ctxt, buf, len, size);
4965 ctxt->instate = state;
4966 return;
4967}
4968
Owen Taylor3473f882001-02-23 17:55:21 +00004969
4970/**
4971 * xmlParsePITarget:
4972 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004973 *
Owen Taylor3473f882001-02-23 17:55:21 +00004974 * parse the name of a PI
4975 *
4976 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4977 *
4978 * Returns the PITarget name or NULL
4979 */
4980
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004981const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004982xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004983 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004984
4985 name = xmlParseName(ctxt);
4986 if ((name != NULL) &&
4987 ((name[0] == 'x') || (name[0] == 'X')) &&
4988 ((name[1] == 'm') || (name[1] == 'M')) &&
4989 ((name[2] == 'l') || (name[2] == 'L'))) {
4990 int i;
4991 if ((name[0] == 'x') && (name[1] == 'm') &&
4992 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004993 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004994 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004995 return(name);
4996 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004997 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004998 return(name);
4999 }
5000 for (i = 0;;i++) {
5001 if (xmlW3CPIs[i] == NULL) break;
5002 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5003 return(name);
5004 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005005 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5006 "xmlParsePITarget: invalid name prefix 'xml'\n",
5007 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005008 }
Daniel Veillard37334572008-07-31 08:20:02 +00005009 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005010 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005011 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005012 }
Owen Taylor3473f882001-02-23 17:55:21 +00005013 return(name);
5014}
5015
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005016#ifdef LIBXML_CATALOG_ENABLED
5017/**
5018 * xmlParseCatalogPI:
5019 * @ctxt: an XML parser context
5020 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005021 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005022 * parse an XML Catalog Processing Instruction.
5023 *
5024 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5025 *
5026 * Occurs only if allowed by the user and if happening in the Misc
5027 * part of the document before any doctype informations
5028 * This will add the given catalog to the parsing context in order
5029 * to be used if there is a resolution need further down in the document
5030 */
5031
5032static void
5033xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5034 xmlChar *URL = NULL;
5035 const xmlChar *tmp, *base;
5036 xmlChar marker;
5037
5038 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005039 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005040 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5041 goto error;
5042 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005043 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005044 if (*tmp != '=') {
5045 return;
5046 }
5047 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005048 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005049 marker = *tmp;
5050 if ((marker != '\'') && (marker != '"'))
5051 goto error;
5052 tmp++;
5053 base = tmp;
5054 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5055 if (*tmp == 0)
5056 goto error;
5057 URL = xmlStrndup(base, tmp - base);
5058 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005059 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005060 if (*tmp != 0)
5061 goto error;
5062
5063 if (URL != NULL) {
5064 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5065 xmlFree(URL);
5066 }
5067 return;
5068
5069error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005070 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5071 "Catalog PI syntax error: %s\n",
5072 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005073 if (URL != NULL)
5074 xmlFree(URL);
5075}
5076#endif
5077
Owen Taylor3473f882001-02-23 17:55:21 +00005078/**
5079 * xmlParsePI:
5080 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005081 *
Owen Taylor3473f882001-02-23 17:55:21 +00005082 * parse an XML Processing Instruction.
5083 *
5084 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5085 *
5086 * The processing is transfered to SAX once parsed.
5087 */
5088
5089void
5090xmlParsePI(xmlParserCtxtPtr ctxt) {
5091 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005092 size_t len = 0;
5093 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005094 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005095 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005096 xmlParserInputState state;
5097 int count = 0;
5098
5099 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005100 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005101 state = ctxt->instate;
5102 ctxt->instate = XML_PARSER_PI;
5103 /*
5104 * this is a Processing Instruction.
5105 */
5106 SKIP(2);
5107 SHRINK;
5108
5109 /*
5110 * Parse the target name and check for special support like
5111 * namespace.
5112 */
5113 target = xmlParsePITarget(ctxt);
5114 if (target != NULL) {
5115 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005116 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005117 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005118 "PI declaration doesn't start and stop in"
5119 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005120 }
5121 SKIP(2);
5122
5123 /*
5124 * SAX: PI detected.
5125 */
5126 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5127 (ctxt->sax->processingInstruction != NULL))
5128 ctxt->sax->processingInstruction(ctxt->userData,
5129 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005130 if (ctxt->instate != XML_PARSER_EOF)
5131 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005132 return;
5133 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005134 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005135 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005136 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005137 ctxt->instate = state;
5138 return;
5139 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005140 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005141 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5142 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005143 }
Owen Taylor3473f882001-02-23 17:55:21 +00005144 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005145 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005146 ((cur != '?') || (NXT(1) != '>'))) {
5147 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005148 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005149 size_t new_size = size * 2;
5150 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005151 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005152 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005153 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005154 ctxt->instate = state;
5155 return;
5156 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005157 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005158 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005159 }
5160 count++;
5161 if (count > 50) {
5162 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005163 if (ctxt->instate == XML_PARSER_EOF) {
5164 xmlFree(buf);
5165 return;
5166 }
Owen Taylor3473f882001-02-23 17:55:21 +00005167 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005168 if ((len > XML_MAX_TEXT_LENGTH) &&
5169 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5170 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5171 "PI %s too big found", target);
5172 xmlFree(buf);
5173 ctxt->instate = state;
5174 return;
5175 }
Owen Taylor3473f882001-02-23 17:55:21 +00005176 }
5177 COPY_BUF(l,buf,len,cur);
5178 NEXTL(l);
5179 cur = CUR_CHAR(l);
5180 if (cur == 0) {
5181 SHRINK;
5182 GROW;
5183 cur = CUR_CHAR(l);
5184 }
5185 }
Daniel Veillard51304812012-07-19 20:34:26 +08005186 if ((len > XML_MAX_TEXT_LENGTH) &&
5187 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5188 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5189 "PI %s too big found", target);
5190 xmlFree(buf);
5191 ctxt->instate = state;
5192 return;
5193 }
Owen Taylor3473f882001-02-23 17:55:21 +00005194 buf[len] = 0;
5195 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005196 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5197 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005198 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005199 if (inputid != ctxt->input->id) {
5200 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5201 "PI declaration doesn't start and stop in"
5202 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005203 }
5204 SKIP(2);
5205
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005206#ifdef LIBXML_CATALOG_ENABLED
5207 if (((state == XML_PARSER_MISC) ||
5208 (state == XML_PARSER_START)) &&
5209 (xmlStrEqual(target, XML_CATALOG_PI))) {
5210 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5211 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5212 (allow == XML_CATA_ALLOW_ALL))
5213 xmlParseCatalogPI(ctxt, buf);
5214 }
5215#endif
5216
5217
Owen Taylor3473f882001-02-23 17:55:21 +00005218 /*
5219 * SAX: PI detected.
5220 */
5221 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5222 (ctxt->sax->processingInstruction != NULL))
5223 ctxt->sax->processingInstruction(ctxt->userData,
5224 target, buf);
5225 }
5226 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005227 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005228 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 }
Chris Evans77404b82011-12-14 16:18:25 +08005230 if (ctxt->instate != XML_PARSER_EOF)
5231 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005232 }
5233}
5234
5235/**
5236 * xmlParseNotationDecl:
5237 * @ctxt: an XML parser context
5238 *
5239 * parse a notation declaration
5240 *
5241 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5242 *
5243 * Hence there is actually 3 choices:
5244 * 'PUBLIC' S PubidLiteral
5245 * 'PUBLIC' S PubidLiteral S SystemLiteral
5246 * and 'SYSTEM' S SystemLiteral
5247 *
5248 * See the NOTE on xmlParseExternalID().
5249 */
5250
5251void
5252xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005253 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005254 xmlChar *Pubid;
5255 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005256
Daniel Veillarda07050d2003-10-19 14:46:32 +00005257 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005258 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005259 SHRINK;
5260 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005261 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005262 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5263 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005264 return;
5265 }
Owen Taylor3473f882001-02-23 17:55:21 +00005266
Daniel Veillard76d66f42001-05-16 21:05:17 +00005267 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005269 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 return;
5271 }
Daniel Veillard37334572008-07-31 08:20:02 +00005272 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005273 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005274 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005275 name, NULL, NULL);
5276 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005277 if (SKIP_BLANKS == 0) {
5278 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5279 "Space required after the NOTATION name'\n");
5280 return;
5281 }
Owen Taylor3473f882001-02-23 17:55:21 +00005282
5283 /*
5284 * Parse the IDs.
5285 */
5286 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5287 SKIP_BLANKS;
5288
5289 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005290 if (inputid != ctxt->input->id) {
5291 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5292 "Notation declaration doesn't start and stop"
5293 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005294 }
5295 NEXT;
5296 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5297 (ctxt->sax->notationDecl != NULL))
5298 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5299 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005300 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005301 }
Owen Taylor3473f882001-02-23 17:55:21 +00005302 if (Systemid != NULL) xmlFree(Systemid);
5303 if (Pubid != NULL) xmlFree(Pubid);
5304 }
5305}
5306
5307/**
5308 * xmlParseEntityDecl:
5309 * @ctxt: an XML parser context
5310 *
5311 * parse <!ENTITY declarations
5312 *
5313 * [70] EntityDecl ::= GEDecl | PEDecl
5314 *
5315 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5316 *
5317 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5318 *
5319 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5320 *
5321 * [74] PEDef ::= EntityValue | ExternalID
5322 *
5323 * [76] NDataDecl ::= S 'NDATA' S Name
5324 *
5325 * [ VC: Notation Declared ]
5326 * The Name must match the declared name of a notation.
5327 */
5328
5329void
5330xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005331 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005332 xmlChar *value = NULL;
5333 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005334 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005335 int isParameter = 0;
5336 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005337
Daniel Veillard4c778d82005-01-23 17:37:44 +00005338 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005339 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005340 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005341 SHRINK;
5342 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005343 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005344 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5345 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005346 }
Owen Taylor3473f882001-02-23 17:55:21 +00005347
5348 if (RAW == '%') {
5349 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005350 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005352 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005353 }
Owen Taylor3473f882001-02-23 17:55:21 +00005354 isParameter = 1;
5355 }
5356
Daniel Veillard76d66f42001-05-16 21:05:17 +00005357 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005358 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005359 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5360 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005361 return;
5362 }
Daniel Veillard37334572008-07-31 08:20:02 +00005363 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005364 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005365 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005366 name, NULL, NULL);
5367 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005368 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005369 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5370 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005371 }
Owen Taylor3473f882001-02-23 17:55:21 +00005372
Daniel Veillardf5582f12002-06-11 10:08:16 +00005373 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005374 /*
5375 * handle the various case of definitions...
5376 */
5377 if (isParameter) {
5378 if ((RAW == '"') || (RAW == '\'')) {
5379 value = xmlParseEntityValue(ctxt, &orig);
5380 if (value) {
5381 if ((ctxt->sax != NULL) &&
5382 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5383 ctxt->sax->entityDecl(ctxt->userData, name,
5384 XML_INTERNAL_PARAMETER_ENTITY,
5385 NULL, NULL, value);
5386 }
5387 } else {
5388 URI = xmlParseExternalID(ctxt, &literal, 1);
5389 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005390 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005391 }
5392 if (URI) {
5393 xmlURIPtr uri;
5394
5395 uri = xmlParseURI((const char *) URI);
5396 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005397 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5398 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005399 /*
5400 * This really ought to be a well formedness error
5401 * but the XML Core WG decided otherwise c.f. issue
5402 * E26 of the XML erratas.
5403 */
Owen Taylor3473f882001-02-23 17:55:21 +00005404 } else {
5405 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005406 /*
5407 * Okay this is foolish to block those but not
5408 * invalid URIs.
5409 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005410 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005411 } else {
5412 if ((ctxt->sax != NULL) &&
5413 (!ctxt->disableSAX) &&
5414 (ctxt->sax->entityDecl != NULL))
5415 ctxt->sax->entityDecl(ctxt->userData, name,
5416 XML_EXTERNAL_PARAMETER_ENTITY,
5417 literal, URI, NULL);
5418 }
5419 xmlFreeURI(uri);
5420 }
5421 }
5422 }
5423 } else {
5424 if ((RAW == '"') || (RAW == '\'')) {
5425 value = xmlParseEntityValue(ctxt, &orig);
5426 if ((ctxt->sax != NULL) &&
5427 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5428 ctxt->sax->entityDecl(ctxt->userData, name,
5429 XML_INTERNAL_GENERAL_ENTITY,
5430 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005431 /*
5432 * For expat compatibility in SAX mode.
5433 */
5434 if ((ctxt->myDoc == NULL) ||
5435 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5436 if (ctxt->myDoc == NULL) {
5437 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005438 if (ctxt->myDoc == NULL) {
5439 xmlErrMemory(ctxt, "New Doc failed");
5440 return;
5441 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005442 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005443 }
5444 if (ctxt->myDoc->intSubset == NULL)
5445 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5446 BAD_CAST "fake", NULL, NULL);
5447
Daniel Veillard1af9a412003-08-20 22:54:39 +00005448 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5449 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005450 }
Owen Taylor3473f882001-02-23 17:55:21 +00005451 } else {
5452 URI = xmlParseExternalID(ctxt, &literal, 1);
5453 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005454 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005455 }
5456 if (URI) {
5457 xmlURIPtr uri;
5458
5459 uri = xmlParseURI((const char *)URI);
5460 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005461 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5462 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005463 /*
5464 * This really ought to be a well formedness error
5465 * but the XML Core WG decided otherwise c.f. issue
5466 * E26 of the XML erratas.
5467 */
Owen Taylor3473f882001-02-23 17:55:21 +00005468 } else {
5469 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005470 /*
5471 * Okay this is foolish to block those but not
5472 * invalid URIs.
5473 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005474 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005475 }
5476 xmlFreeURI(uri);
5477 }
5478 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005479 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005480 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5481 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005482 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005483 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005484 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005485 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005486 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5487 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005488 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005489 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005490 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5491 (ctxt->sax->unparsedEntityDecl != NULL))
5492 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5493 literal, URI, ndata);
5494 } else {
5495 if ((ctxt->sax != NULL) &&
5496 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5497 ctxt->sax->entityDecl(ctxt->userData, name,
5498 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5499 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005500 /*
5501 * For expat compatibility in SAX mode.
5502 * assuming the entity repalcement was asked for
5503 */
5504 if ((ctxt->replaceEntities != 0) &&
5505 ((ctxt->myDoc == NULL) ||
5506 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5507 if (ctxt->myDoc == NULL) {
5508 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005509 if (ctxt->myDoc == NULL) {
5510 xmlErrMemory(ctxt, "New Doc failed");
5511 return;
5512 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005513 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005514 }
5515
5516 if (ctxt->myDoc->intSubset == NULL)
5517 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5518 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005519 xmlSAX2EntityDecl(ctxt, name,
5520 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5521 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005522 }
Owen Taylor3473f882001-02-23 17:55:21 +00005523 }
5524 }
5525 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005526 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005527 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005528 SKIP_BLANKS;
5529 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005530 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005531 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005532 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005533 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005534 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005535 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005536 "Entity declaration doesn't start and stop in"
5537 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
5539 NEXT;
5540 }
5541 if (orig != NULL) {
5542 /*
5543 * Ugly mechanism to save the raw entity value.
5544 */
5545 xmlEntityPtr cur = NULL;
5546
5547 if (isParameter) {
5548 if ((ctxt->sax != NULL) &&
5549 (ctxt->sax->getParameterEntity != NULL))
5550 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5551 } else {
5552 if ((ctxt->sax != NULL) &&
5553 (ctxt->sax->getEntity != NULL))
5554 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005555 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005556 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005557 }
Owen Taylor3473f882001-02-23 17:55:21 +00005558 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005559 if ((cur != NULL) && (cur->orig == NULL)) {
5560 cur->orig = orig;
5561 orig = NULL;
5562 }
Owen Taylor3473f882001-02-23 17:55:21 +00005563 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005564
5565done:
Owen Taylor3473f882001-02-23 17:55:21 +00005566 if (value != NULL) xmlFree(value);
5567 if (URI != NULL) xmlFree(URI);
5568 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005569 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 }
5571}
5572
5573/**
5574 * xmlParseDefaultDecl:
5575 * @ctxt: an XML parser context
5576 * @value: Receive a possible fixed default value for the attribute
5577 *
5578 * Parse an attribute default declaration
5579 *
5580 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5581 *
5582 * [ VC: Required Attribute ]
5583 * if the default declaration is the keyword #REQUIRED, then the
5584 * attribute must be specified for all elements of the type in the
5585 * attribute-list declaration.
5586 *
5587 * [ VC: Attribute Default Legal ]
5588 * The declared default value must meet the lexical constraints of
5589 * the declared attribute type c.f. xmlValidateAttributeDecl()
5590 *
5591 * [ VC: Fixed Attribute Default ]
5592 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005593 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005594 *
5595 * [ WFC: No < in Attribute Values ]
5596 * handled in xmlParseAttValue()
5597 *
5598 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005599 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005600 */
5601
5602int
5603xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5604 int val;
5605 xmlChar *ret;
5606
5607 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005608 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005609 SKIP(9);
5610 return(XML_ATTRIBUTE_REQUIRED);
5611 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005612 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005613 SKIP(8);
5614 return(XML_ATTRIBUTE_IMPLIED);
5615 }
5616 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005617 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005618 SKIP(6);
5619 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005620 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005621 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5622 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005623 }
Owen Taylor3473f882001-02-23 17:55:21 +00005624 }
5625 ret = xmlParseAttValue(ctxt);
5626 ctxt->instate = XML_PARSER_DTD;
5627 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005628 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005629 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005630 } else
5631 *value = ret;
5632 return(val);
5633}
5634
5635/**
5636 * xmlParseNotationType:
5637 * @ctxt: an XML parser context
5638 *
5639 * parse an Notation attribute type.
5640 *
5641 * Note: the leading 'NOTATION' S part has already being parsed...
5642 *
5643 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5644 *
5645 * [ VC: Notation Attributes ]
5646 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005647 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005648 *
5649 * Returns: the notation attribute tree built while parsing
5650 */
5651
5652xmlEnumerationPtr
5653xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005654 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005655 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005656
5657 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005658 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005659 return(NULL);
5660 }
5661 SHRINK;
5662 do {
5663 NEXT;
5664 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005665 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005666 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005667 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5668 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005669 xmlFreeEnumeration(ret);
5670 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005671 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005672 tmp = ret;
5673 while (tmp != NULL) {
5674 if (xmlStrEqual(name, tmp->name)) {
5675 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5676 "standalone: attribute notation value token %s duplicated\n",
5677 name, NULL);
5678 if (!xmlDictOwns(ctxt->dict, name))
5679 xmlFree((xmlChar *) name);
5680 break;
5681 }
5682 tmp = tmp->next;
5683 }
5684 if (tmp == NULL) {
5685 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005686 if (cur == NULL) {
5687 xmlFreeEnumeration(ret);
5688 return(NULL);
5689 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005690 if (last == NULL) ret = last = cur;
5691 else {
5692 last->next = cur;
5693 last = cur;
5694 }
Owen Taylor3473f882001-02-23 17:55:21 +00005695 }
5696 SKIP_BLANKS;
5697 } while (RAW == '|');
5698 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005699 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005700 xmlFreeEnumeration(ret);
5701 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005702 }
5703 NEXT;
5704 return(ret);
5705}
5706
5707/**
5708 * xmlParseEnumerationType:
5709 * @ctxt: an XML parser context
5710 *
5711 * parse an Enumeration attribute type.
5712 *
5713 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5714 *
5715 * [ VC: Enumeration ]
5716 * Values of this type must match one of the Nmtoken tokens in
5717 * the declaration
5718 *
5719 * Returns: the enumeration attribute tree built while parsing
5720 */
5721
5722xmlEnumerationPtr
5723xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5724 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005725 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005726
5727 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005728 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 return(NULL);
5730 }
5731 SHRINK;
5732 do {
5733 NEXT;
5734 SKIP_BLANKS;
5735 name = xmlParseNmtoken(ctxt);
5736 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005737 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005738 return(ret);
5739 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005740 tmp = ret;
5741 while (tmp != NULL) {
5742 if (xmlStrEqual(name, tmp->name)) {
5743 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5744 "standalone: attribute enumeration value token %s duplicated\n",
5745 name, NULL);
5746 if (!xmlDictOwns(ctxt->dict, name))
5747 xmlFree(name);
5748 break;
5749 }
5750 tmp = tmp->next;
5751 }
5752 if (tmp == NULL) {
5753 cur = xmlCreateEnumeration(name);
5754 if (!xmlDictOwns(ctxt->dict, name))
5755 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005756 if (cur == NULL) {
5757 xmlFreeEnumeration(ret);
5758 return(NULL);
5759 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005760 if (last == NULL) ret = last = cur;
5761 else {
5762 last->next = cur;
5763 last = cur;
5764 }
Owen Taylor3473f882001-02-23 17:55:21 +00005765 }
5766 SKIP_BLANKS;
5767 } while (RAW == '|');
5768 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005769 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005770 return(ret);
5771 }
5772 NEXT;
5773 return(ret);
5774}
5775
5776/**
5777 * xmlParseEnumeratedType:
5778 * @ctxt: an XML parser context
5779 * @tree: the enumeration tree built while parsing
5780 *
5781 * parse an Enumerated attribute type.
5782 *
5783 * [57] EnumeratedType ::= NotationType | Enumeration
5784 *
5785 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5786 *
5787 *
5788 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5789 */
5790
5791int
5792xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005793 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005794 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005795 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005796 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5797 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005798 return(0);
5799 }
Owen Taylor3473f882001-02-23 17:55:21 +00005800 *tree = xmlParseNotationType(ctxt);
5801 if (*tree == NULL) return(0);
5802 return(XML_ATTRIBUTE_NOTATION);
5803 }
5804 *tree = xmlParseEnumerationType(ctxt);
5805 if (*tree == NULL) return(0);
5806 return(XML_ATTRIBUTE_ENUMERATION);
5807}
5808
5809/**
5810 * xmlParseAttributeType:
5811 * @ctxt: an XML parser context
5812 * @tree: the enumeration tree built while parsing
5813 *
5814 * parse the Attribute list def for an element
5815 *
5816 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5817 *
5818 * [55] StringType ::= 'CDATA'
5819 *
5820 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5821 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5822 *
5823 * Validity constraints for attribute values syntax are checked in
5824 * xmlValidateAttributeValue()
5825 *
5826 * [ VC: ID ]
5827 * Values of type ID must match the Name production. A name must not
5828 * appear more than once in an XML document as a value of this type;
5829 * i.e., ID values must uniquely identify the elements which bear them.
5830 *
5831 * [ VC: One ID per Element Type ]
5832 * No element type may have more than one ID attribute specified.
5833 *
5834 * [ VC: ID Attribute Default ]
5835 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5836 *
5837 * [ VC: IDREF ]
5838 * Values of type IDREF must match the Name production, and values
5839 * of type IDREFS must match Names; each IDREF Name must match the value
5840 * of an ID attribute on some element in the XML document; i.e. IDREF
5841 * values must match the value of some ID attribute.
5842 *
5843 * [ VC: Entity Name ]
5844 * Values of type ENTITY must match the Name production, values
5845 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005846 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005847 *
5848 * [ VC: Name Token ]
5849 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005850 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005851 *
5852 * Returns the attribute type
5853 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005854int
Owen Taylor3473f882001-02-23 17:55:21 +00005855xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5856 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005857 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005858 SKIP(5);
5859 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005860 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005861 SKIP(6);
5862 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005863 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005864 SKIP(5);
5865 return(XML_ATTRIBUTE_IDREF);
5866 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5867 SKIP(2);
5868 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005869 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005870 SKIP(6);
5871 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005872 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005873 SKIP(8);
5874 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005875 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005876 SKIP(8);
5877 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005878 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005879 SKIP(7);
5880 return(XML_ATTRIBUTE_NMTOKEN);
5881 }
5882 return(xmlParseEnumeratedType(ctxt, tree));
5883}
5884
5885/**
5886 * xmlParseAttributeListDecl:
5887 * @ctxt: an XML parser context
5888 *
5889 * : parse the Attribute list def for an element
5890 *
5891 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5892 *
5893 * [53] AttDef ::= S Name S AttType S DefaultDecl
5894 *
5895 */
5896void
5897xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005898 const xmlChar *elemName;
5899 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005900 xmlEnumerationPtr tree;
5901
Daniel Veillarda07050d2003-10-19 14:46:32 +00005902 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005903 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005904
5905 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005906 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005907 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005908 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005909 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005910 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005912 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5913 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005914 return;
5915 }
5916 SKIP_BLANKS;
5917 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005918 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005919 int type;
5920 int def;
5921 xmlChar *defaultValue = NULL;
5922
5923 GROW;
5924 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005925 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005926 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005927 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5928 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005929 break;
5930 }
5931 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005932 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005933 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005934 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005935 break;
5936 }
Owen Taylor3473f882001-02-23 17:55:21 +00005937
5938 type = xmlParseAttributeType(ctxt, &tree);
5939 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005940 break;
5941 }
5942
5943 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005944 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5946 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005947 if (tree != NULL)
5948 xmlFreeEnumeration(tree);
5949 break;
5950 }
Owen Taylor3473f882001-02-23 17:55:21 +00005951
5952 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5953 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005954 if (defaultValue != NULL)
5955 xmlFree(defaultValue);
5956 if (tree != NULL)
5957 xmlFreeEnumeration(tree);
5958 break;
5959 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005960 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5961 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005962
5963 GROW;
5964 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005965 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005966 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005967 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005968 if (defaultValue != NULL)
5969 xmlFree(defaultValue);
5970 if (tree != NULL)
5971 xmlFreeEnumeration(tree);
5972 break;
5973 }
Owen Taylor3473f882001-02-23 17:55:21 +00005974 }
Owen Taylor3473f882001-02-23 17:55:21 +00005975 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5976 (ctxt->sax->attributeDecl != NULL))
5977 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5978 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005979 else if (tree != NULL)
5980 xmlFreeEnumeration(tree);
5981
5982 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005983 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00005984 (def != XML_ATTRIBUTE_REQUIRED)) {
5985 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5986 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005987 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005988 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5989 }
Owen Taylor3473f882001-02-23 17:55:21 +00005990 if (defaultValue != NULL)
5991 xmlFree(defaultValue);
5992 GROW;
5993 }
5994 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005995 if (inputid != ctxt->input->id) {
5996 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5997 "Attribute list declaration doesn't start and"
5998 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005999 }
6000 NEXT;
6001 }
Owen Taylor3473f882001-02-23 17:55:21 +00006002 }
6003}
6004
6005/**
6006 * xmlParseElementMixedContentDecl:
6007 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006008 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006009 *
6010 * parse the declaration for a Mixed Element content
6011 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006012 *
Owen Taylor3473f882001-02-23 17:55:21 +00006013 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6014 * '(' S? '#PCDATA' S? ')'
6015 *
6016 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6017 *
6018 * [ VC: No Duplicate Types ]
6019 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006020 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006021 *
6022 * returns: the list of the xmlElementContentPtr describing the element choices
6023 */
6024xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006025xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006026 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006027 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006028
6029 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006030 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006031 SKIP(7);
6032 SKIP_BLANKS;
6033 SHRINK;
6034 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006035 if (ctxt->input->id != inputchk) {
6036 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6037 "Element content declaration doesn't start and"
6038 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006039 }
Owen Taylor3473f882001-02-23 17:55:21 +00006040 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006041 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006042 if (ret == NULL)
6043 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006044 if (RAW == '*') {
6045 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6046 NEXT;
6047 }
6048 return(ret);
6049 }
6050 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006051 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006052 if (ret == NULL) return(NULL);
6053 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006054 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006055 NEXT;
6056 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006057 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006058 if (ret == NULL) return(NULL);
6059 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006060 if (cur != NULL)
6061 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006062 cur = ret;
6063 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006064 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006065 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006066 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006067 if (n->c1 != NULL)
6068 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006069 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006070 if (n != NULL)
6071 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006072 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006073 }
6074 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006075 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006076 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006077 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006078 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006079 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006080 return(NULL);
6081 }
6082 SKIP_BLANKS;
6083 GROW;
6084 }
6085 if ((RAW == ')') && (NXT(1) == '*')) {
6086 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006087 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006088 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006089 if (cur->c2 != NULL)
6090 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006091 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006092 if (ret != NULL)
6093 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006094 if (ctxt->input->id != inputchk) {
6095 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6096 "Element content declaration doesn't start and"
6097 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006098 }
Owen Taylor3473f882001-02-23 17:55:21 +00006099 SKIP(2);
6100 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006101 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006102 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006103 return(NULL);
6104 }
6105
6106 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006107 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006108 }
6109 return(ret);
6110}
6111
6112/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006113 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006114 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006115 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006116 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006117 *
6118 * parse the declaration for a Mixed Element content
6119 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006120 *
Owen Taylor3473f882001-02-23 17:55:21 +00006121 *
6122 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6123 *
6124 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6125 *
6126 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6127 *
6128 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6129 *
6130 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6131 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006132 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006133 * opening or closing parentheses in a choice, seq, or Mixed
6134 * construct is contained in the replacement text for a parameter
6135 * entity, both must be contained in the same replacement text. For
6136 * interoperability, if a parameter-entity reference appears in a
6137 * choice, seq, or Mixed construct, its replacement text should not
6138 * be empty, and neither the first nor last non-blank character of
6139 * the replacement text should be a connector (| or ,).
6140 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006141 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006142 * hierarchy.
6143 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006144static xmlElementContentPtr
6145xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6146 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006147 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006148 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006149 xmlChar type = 0;
6150
Daniel Veillard489f9672009-08-10 16:49:30 +02006151 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6152 (depth > 2048)) {
6153 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6154"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6155 depth);
6156 return(NULL);
6157 }
Owen Taylor3473f882001-02-23 17:55:21 +00006158 SKIP_BLANKS;
6159 GROW;
6160 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006161 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006162
Owen Taylor3473f882001-02-23 17:55:21 +00006163 /* Recurse on first child */
6164 NEXT;
6165 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006166 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6167 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006168 SKIP_BLANKS;
6169 GROW;
6170 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006171 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006173 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006174 return(NULL);
6175 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006176 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006177 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006178 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006179 return(NULL);
6180 }
Owen Taylor3473f882001-02-23 17:55:21 +00006181 GROW;
6182 if (RAW == '?') {
6183 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6184 NEXT;
6185 } else if (RAW == '*') {
6186 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6187 NEXT;
6188 } else if (RAW == '+') {
6189 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6190 NEXT;
6191 } else {
6192 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6193 }
Owen Taylor3473f882001-02-23 17:55:21 +00006194 GROW;
6195 }
6196 SKIP_BLANKS;
6197 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006198 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006199 /*
6200 * Each loop we parse one separator and one element.
6201 */
6202 if (RAW == ',') {
6203 if (type == 0) type = CUR;
6204
6205 /*
6206 * Detect "Name | Name , Name" error
6207 */
6208 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006209 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006210 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006211 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006212 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006213 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006214 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006215 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006216 return(NULL);
6217 }
6218 NEXT;
6219
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006220 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006221 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006222 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006223 xmlFreeDocElementContent(ctxt->myDoc, last);
6224 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006225 return(NULL);
6226 }
6227 if (last == NULL) {
6228 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006229 if (ret != NULL)
6230 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006231 ret = cur = op;
6232 } else {
6233 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006234 if (op != NULL)
6235 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006236 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006237 if (last != NULL)
6238 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006239 cur =op;
6240 last = NULL;
6241 }
6242 } else if (RAW == '|') {
6243 if (type == 0) type = CUR;
6244
6245 /*
6246 * Detect "Name , Name | Name" error
6247 */
6248 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006249 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006250 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006251 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006252 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006253 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006254 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006255 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006256 return(NULL);
6257 }
6258 NEXT;
6259
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006260 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006261 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006262 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006263 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006264 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006265 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006266 return(NULL);
6267 }
6268 if (last == NULL) {
6269 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006270 if (ret != NULL)
6271 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006272 ret = cur = op;
6273 } else {
6274 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006275 if (op != NULL)
6276 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006277 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006278 if (last != NULL)
6279 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006280 cur =op;
6281 last = NULL;
6282 }
6283 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006284 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006285 if ((last != NULL) && (last != ret))
6286 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006287 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006288 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006289 return(NULL);
6290 }
6291 GROW;
6292 SKIP_BLANKS;
6293 GROW;
6294 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006295 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006296 /* Recurse on second child */
6297 NEXT;
6298 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006299 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6300 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006301 SKIP_BLANKS;
6302 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006303 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006304 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006305 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006307 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006308 return(NULL);
6309 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006310 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006311 if (last == NULL) {
6312 if (ret != NULL)
6313 xmlFreeDocElementContent(ctxt->myDoc, ret);
6314 return(NULL);
6315 }
Owen Taylor3473f882001-02-23 17:55:21 +00006316 if (RAW == '?') {
6317 last->ocur = XML_ELEMENT_CONTENT_OPT;
6318 NEXT;
6319 } else if (RAW == '*') {
6320 last->ocur = XML_ELEMENT_CONTENT_MULT;
6321 NEXT;
6322 } else if (RAW == '+') {
6323 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6324 NEXT;
6325 } else {
6326 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6327 }
6328 }
6329 SKIP_BLANKS;
6330 GROW;
6331 }
6332 if ((cur != NULL) && (last != NULL)) {
6333 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006334 if (last != NULL)
6335 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006336 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006337 if (ctxt->input->id != inputchk) {
6338 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6339 "Element content declaration doesn't start and stop in"
6340 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006341 }
Owen Taylor3473f882001-02-23 17:55:21 +00006342 NEXT;
6343 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006344 if (ret != NULL) {
6345 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6346 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6347 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6348 else
6349 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6350 }
Owen Taylor3473f882001-02-23 17:55:21 +00006351 NEXT;
6352 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006353 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006355 cur = ret;
6356 /*
6357 * Some normalization:
6358 * (a | b* | c?)* == (a | b | c)*
6359 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006360 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006361 if ((cur->c1 != NULL) &&
6362 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6363 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6364 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6365 if ((cur->c2 != NULL) &&
6366 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6367 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6368 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6369 cur = cur->c2;
6370 }
6371 }
Owen Taylor3473f882001-02-23 17:55:21 +00006372 NEXT;
6373 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006374 if (ret != NULL) {
6375 int found = 0;
6376
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006377 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6378 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6379 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006380 else
6381 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006382 /*
6383 * Some normalization:
6384 * (a | b*)+ == (a | b)*
6385 * (a | b?)+ == (a | b)*
6386 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006387 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006388 if ((cur->c1 != NULL) &&
6389 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6390 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6391 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6392 found = 1;
6393 }
6394 if ((cur->c2 != NULL) &&
6395 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6396 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6397 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6398 found = 1;
6399 }
6400 cur = cur->c2;
6401 }
6402 if (found)
6403 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6404 }
Owen Taylor3473f882001-02-23 17:55:21 +00006405 NEXT;
6406 }
6407 return(ret);
6408}
6409
6410/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006411 * xmlParseElementChildrenContentDecl:
6412 * @ctxt: an XML parser context
6413 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006414 *
6415 * parse the declaration for a Mixed Element content
6416 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6417 *
6418 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6419 *
6420 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6421 *
6422 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6423 *
6424 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6425 *
6426 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6427 * TODO Parameter-entity replacement text must be properly nested
6428 * with parenthesized groups. That is to say, if either of the
6429 * opening or closing parentheses in a choice, seq, or Mixed
6430 * construct is contained in the replacement text for a parameter
6431 * entity, both must be contained in the same replacement text. For
6432 * interoperability, if a parameter-entity reference appears in a
6433 * choice, seq, or Mixed construct, its replacement text should not
6434 * be empty, and neither the first nor last non-blank character of
6435 * the replacement text should be a connector (| or ,).
6436 *
6437 * Returns the tree of xmlElementContentPtr describing the element
6438 * hierarchy.
6439 */
6440xmlElementContentPtr
6441xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6442 /* stub left for API/ABI compat */
6443 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6444}
6445
6446/**
Owen Taylor3473f882001-02-23 17:55:21 +00006447 * xmlParseElementContentDecl:
6448 * @ctxt: an XML parser context
6449 * @name: the name of the element being defined.
6450 * @result: the Element Content pointer will be stored here if any
6451 *
6452 * parse the declaration for an Element content either Mixed or Children,
6453 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006454 *
Owen Taylor3473f882001-02-23 17:55:21 +00006455 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6456 *
6457 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6458 */
6459
6460int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006461xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006462 xmlElementContentPtr *result) {
6463
6464 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006465 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006466 int res;
6467
6468 *result = NULL;
6469
6470 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006471 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006472 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006473 return(-1);
6474 }
6475 NEXT;
6476 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006477 if (ctxt->instate == XML_PARSER_EOF)
6478 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006479 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006480 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006481 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006482 res = XML_ELEMENT_TYPE_MIXED;
6483 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006484 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006485 res = XML_ELEMENT_TYPE_ELEMENT;
6486 }
Owen Taylor3473f882001-02-23 17:55:21 +00006487 SKIP_BLANKS;
6488 *result = tree;
6489 return(res);
6490}
6491
6492/**
6493 * xmlParseElementDecl:
6494 * @ctxt: an XML parser context
6495 *
6496 * parse an Element declaration.
6497 *
6498 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6499 *
6500 * [ VC: Unique Element Type Declaration ]
6501 * No element type may be declared more than once
6502 *
6503 * Returns the type of the element, or -1 in case of error
6504 */
6505int
6506xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006507 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006508 int ret = -1;
6509 xmlElementContentPtr content = NULL;
6510
Daniel Veillard4c778d82005-01-23 17:37:44 +00006511 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006512 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006513 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006514
6515 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006516 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6518 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006519 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006520 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006521 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006522 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006523 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6524 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006525 return(-1);
6526 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006527 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006528 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6529 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006530 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006531 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006532 SKIP(5);
6533 /*
6534 * Element must always be empty.
6535 */
6536 ret = XML_ELEMENT_TYPE_EMPTY;
6537 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6538 (NXT(2) == 'Y')) {
6539 SKIP(3);
6540 /*
6541 * Element is a generic container.
6542 */
6543 ret = XML_ELEMENT_TYPE_ANY;
6544 } else if (RAW == '(') {
6545 ret = xmlParseElementContentDecl(ctxt, name, &content);
6546 } else {
6547 /*
6548 * [ WFC: PEs in Internal Subset ] error handling.
6549 */
6550 if ((RAW == '%') && (ctxt->external == 0) &&
6551 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006552 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006553 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006554 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006555 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006556 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6557 }
Owen Taylor3473f882001-02-23 17:55:21 +00006558 return(-1);
6559 }
6560
6561 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006562
6563 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006564 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006565 if (content != NULL) {
6566 xmlFreeDocElementContent(ctxt->myDoc, content);
6567 }
Owen Taylor3473f882001-02-23 17:55:21 +00006568 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006569 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006570 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006571 "Element declaration doesn't start and stop in"
6572 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006573 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006574
Owen Taylor3473f882001-02-23 17:55:21 +00006575 NEXT;
6576 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006577 (ctxt->sax->elementDecl != NULL)) {
6578 if (content != NULL)
6579 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006580 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6581 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006582 if ((content != NULL) && (content->parent == NULL)) {
6583 /*
6584 * this is a trick: if xmlAddElementDecl is called,
6585 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006586 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006587 * interfaces or change the API/ABI
6588 */
6589 xmlFreeDocElementContent(ctxt->myDoc, content);
6590 }
6591 } else if (content != NULL) {
6592 xmlFreeDocElementContent(ctxt->myDoc, content);
6593 }
Owen Taylor3473f882001-02-23 17:55:21 +00006594 }
Owen Taylor3473f882001-02-23 17:55:21 +00006595 }
6596 return(ret);
6597}
6598
6599/**
Owen Taylor3473f882001-02-23 17:55:21 +00006600 * xmlParseConditionalSections
6601 * @ctxt: an XML parser context
6602 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006603 * [61] conditionalSect ::= includeSect | ignoreSect
6604 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006605 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6606 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6607 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6608 */
6609
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006610static void
Owen Taylor3473f882001-02-23 17:55:21 +00006611xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006612 int id = ctxt->input->id;
6613
Owen Taylor3473f882001-02-23 17:55:21 +00006614 SKIP(3);
6615 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006616 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006617 SKIP(7);
6618 SKIP_BLANKS;
6619 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006620 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006621 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006622 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006623 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006624 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006625 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6626 "All markup of the conditional section is not"
6627 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006628 }
Owen Taylor3473f882001-02-23 17:55:21 +00006629 NEXT;
6630 }
6631 if (xmlParserDebugEntities) {
6632 if ((ctxt->input != NULL) && (ctxt->input->filename))
6633 xmlGenericError(xmlGenericErrorContext,
6634 "%s(%d): ", ctxt->input->filename,
6635 ctxt->input->line);
6636 xmlGenericError(xmlGenericErrorContext,
6637 "Entering INCLUDE Conditional Section\n");
6638 }
6639
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006640 SKIP_BLANKS;
6641 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006642 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6643 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006644 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006645 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006646
6647 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6648 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006649 } else
6650 xmlParseMarkupDecl(ctxt);
6651
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006652 SKIP_BLANKS;
6653 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006654
Daniel Veillardfdc91562002-07-01 21:52:03 +00006655 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006656 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006657 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 break;
6659 }
6660 }
6661 if (xmlParserDebugEntities) {
6662 if ((ctxt->input != NULL) && (ctxt->input->filename))
6663 xmlGenericError(xmlGenericErrorContext,
6664 "%s(%d): ", ctxt->input->filename,
6665 ctxt->input->line);
6666 xmlGenericError(xmlGenericErrorContext,
6667 "Leaving INCLUDE Conditional Section\n");
6668 }
6669
Daniel Veillarda07050d2003-10-19 14:46:32 +00006670 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006671 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006672 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006673 int depth = 0;
6674
6675 SKIP(6);
6676 SKIP_BLANKS;
6677 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006678 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006679 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006680 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006681 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006682 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006683 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6684 "All markup of the conditional section is not"
6685 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006686 }
Owen Taylor3473f882001-02-23 17:55:21 +00006687 NEXT;
6688 }
6689 if (xmlParserDebugEntities) {
6690 if ((ctxt->input != NULL) && (ctxt->input->filename))
6691 xmlGenericError(xmlGenericErrorContext,
6692 "%s(%d): ", ctxt->input->filename,
6693 ctxt->input->line);
6694 xmlGenericError(xmlGenericErrorContext,
6695 "Entering IGNORE Conditional Section\n");
6696 }
6697
6698 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006699 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006700 * But disable SAX event generating DTD building in the meantime
6701 */
6702 state = ctxt->disableSAX;
6703 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006704 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006705 ctxt->instate = XML_PARSER_IGNORE;
6706
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006707 while (((depth >= 0) && (RAW != 0)) &&
6708 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006709 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6710 depth++;
6711 SKIP(3);
6712 continue;
6713 }
6714 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6715 if (--depth >= 0) SKIP(3);
6716 continue;
6717 }
6718 NEXT;
6719 continue;
6720 }
6721
6722 ctxt->disableSAX = state;
6723 ctxt->instate = instate;
6724
6725 if (xmlParserDebugEntities) {
6726 if ((ctxt->input != NULL) && (ctxt->input->filename))
6727 xmlGenericError(xmlGenericErrorContext,
6728 "%s(%d): ", ctxt->input->filename,
6729 ctxt->input->line);
6730 xmlGenericError(xmlGenericErrorContext,
6731 "Leaving IGNORE Conditional Section\n");
6732 }
6733
6734 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006735 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006736 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006737 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006738 }
6739
6740 if (RAW == 0)
6741 SHRINK;
6742
6743 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006744 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006745 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006746 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006747 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6748 "All markup of the conditional section is not in"
6749 " the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006750 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006751 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006752 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006753 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006754 }
6755}
6756
6757/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006758 * xmlParseMarkupDecl:
6759 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006760 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006761 * parse Markup declarations
6762 *
6763 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6764 * NotationDecl | PI | Comment
6765 *
6766 * [ VC: Proper Declaration/PE Nesting ]
6767 * Parameter-entity replacement text must be properly nested with
6768 * markup declarations. That is to say, if either the first character
6769 * or the last character of a markup declaration (markupdecl above) is
6770 * contained in the replacement text for a parameter-entity reference,
6771 * both must be contained in the same replacement text.
6772 *
6773 * [ WFC: PEs in Internal Subset ]
6774 * In the internal DTD subset, parameter-entity references can occur
6775 * only where markup declarations can occur, not within markup declarations.
6776 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006777 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006778 */
6779void
6780xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6781 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006782 if (CUR == '<') {
6783 if (NXT(1) == '!') {
6784 switch (NXT(2)) {
6785 case 'E':
6786 if (NXT(3) == 'L')
6787 xmlParseElementDecl(ctxt);
6788 else if (NXT(3) == 'N')
6789 xmlParseEntityDecl(ctxt);
6790 break;
6791 case 'A':
6792 xmlParseAttributeListDecl(ctxt);
6793 break;
6794 case 'N':
6795 xmlParseNotationDecl(ctxt);
6796 break;
6797 case '-':
6798 xmlParseComment(ctxt);
6799 break;
6800 default:
6801 /* there is an error but it will be detected later */
6802 break;
6803 }
6804 } else if (NXT(1) == '?') {
6805 xmlParsePI(ctxt);
6806 }
6807 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006808
6809 /*
6810 * detect requirement to exit there and act accordingly
6811 * and avoid having instate overriden later on
6812 */
6813 if (ctxt->instate == XML_PARSER_EOF)
6814 return;
6815
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006816 /*
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006817 * Conditional sections are allowed from entities included
6818 * by PE References in the internal subset.
6819 */
6820 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6821 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6822 xmlParseConditionalSections(ctxt);
6823 }
6824 }
6825
6826 ctxt->instate = XML_PARSER_DTD;
6827}
6828
6829/**
6830 * xmlParseTextDecl:
6831 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006832 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006833 * parse an XML declaration header for external entities
6834 *
6835 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006836 */
6837
6838void
6839xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6840 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006841 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006842
6843 /*
6844 * We know that '<?xml' is here.
6845 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006846 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006847 SKIP(5);
6848 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006849 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006850 return;
6851 }
6852
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006853 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006854 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6855 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006856 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006857
6858 /*
6859 * We may have the VersionInfo here.
6860 */
6861 version = xmlParseVersionInfo(ctxt);
6862 if (version == NULL)
6863 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006864 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006865 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006866 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6867 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006868 }
6869 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006870 ctxt->input->version = version;
6871
6872 /*
6873 * We must have the encoding declaration
6874 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006875 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006876 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6877 /*
6878 * The XML REC instructs us to stop parsing right here
6879 */
6880 return;
6881 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006882 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6883 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6884 "Missing encoding in text declaration\n");
6885 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006886
6887 SKIP_BLANKS;
6888 if ((RAW == '?') && (NXT(1) == '>')) {
6889 SKIP(2);
6890 } else if (RAW == '>') {
6891 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006892 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006893 NEXT;
6894 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006895 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006896 MOVETO_ENDTAG(CUR_PTR);
6897 NEXT;
6898 }
6899}
6900
6901/**
Owen Taylor3473f882001-02-23 17:55:21 +00006902 * xmlParseExternalSubset:
6903 * @ctxt: an XML parser context
6904 * @ExternalID: the external identifier
6905 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006906 *
Owen Taylor3473f882001-02-23 17:55:21 +00006907 * parse Markup declarations from an external subset
6908 *
6909 * [30] extSubset ::= textDecl? extSubsetDecl
6910 *
6911 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6912 */
6913void
6914xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6915 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006916 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006917 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006918
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006919 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006920 (ctxt->input->end - ctxt->input->cur >= 4)) {
6921 xmlChar start[4];
6922 xmlCharEncoding enc;
6923
6924 start[0] = RAW;
6925 start[1] = NXT(1);
6926 start[2] = NXT(2);
6927 start[3] = NXT(3);
6928 enc = xmlDetectCharEncoding(start, 4);
6929 if (enc != XML_CHAR_ENCODING_NONE)
6930 xmlSwitchEncoding(ctxt, enc);
6931 }
6932
Daniel Veillarda07050d2003-10-19 14:46:32 +00006933 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006934 xmlParseTextDecl(ctxt);
6935 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6936 /*
6937 * The XML REC instructs us to stop parsing right here
6938 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08006939 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006940 return;
6941 }
6942 }
6943 if (ctxt->myDoc == NULL) {
6944 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006945 if (ctxt->myDoc == NULL) {
6946 xmlErrMemory(ctxt, "New Doc failed");
6947 return;
6948 }
6949 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006950 }
6951 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6952 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6953
6954 ctxt->instate = XML_PARSER_DTD;
6955 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006956 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006957 while (((RAW == '<') && (NXT(1) == '?')) ||
6958 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006959 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006960 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006961 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006962
6963 GROW;
6964 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6965 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006966 } else
6967 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006968 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006969
Daniel Veillardfdc91562002-07-01 21:52:03 +00006970 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006971 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006972 break;
6973 }
6974 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006975
Owen Taylor3473f882001-02-23 17:55:21 +00006976 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006977 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006978 }
6979
6980}
6981
6982/**
6983 * xmlParseReference:
6984 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006985 *
Owen Taylor3473f882001-02-23 17:55:21 +00006986 * parse and handle entity references in content, depending on the SAX
6987 * interface, this may end-up in a call to character() if this is a
6988 * CharRef, a predefined entity, if there is no reference() callback.
6989 * or if the parser was asked to switch to that mode.
6990 *
6991 * [67] Reference ::= EntityRef | CharRef
6992 */
6993void
6994xmlParseReference(xmlParserCtxtPtr ctxt) {
6995 xmlEntityPtr ent;
6996 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006997 int was_checked;
6998 xmlNodePtr list = NULL;
6999 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007000
Daniel Veillard0161e632008-08-28 15:36:32 +00007001
7002 if (RAW != '&')
7003 return;
7004
7005 /*
7006 * Simple case of a CharRef
7007 */
Owen Taylor3473f882001-02-23 17:55:21 +00007008 if (NXT(1) == '#') {
7009 int i = 0;
7010 xmlChar out[10];
7011 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007012 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007013
Daniel Veillarddc171602008-03-26 17:41:38 +00007014 if (value == 0)
7015 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007016 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7017 /*
7018 * So we are using non-UTF-8 buffers
7019 * Check that the char fit on 8bits, if not
7020 * generate a CharRef.
7021 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007022 if (value <= 0xFF) {
7023 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007024 out[1] = 0;
7025 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7026 (!ctxt->disableSAX))
7027 ctxt->sax->characters(ctxt->userData, out, 1);
7028 } else {
7029 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007030 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007031 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007032 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007033 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7034 (!ctxt->disableSAX))
7035 ctxt->sax->reference(ctxt->userData, out);
7036 }
7037 } else {
7038 /*
7039 * Just encode the value in UTF-8
7040 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007041 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007042 out[i] = 0;
7043 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7044 (!ctxt->disableSAX))
7045 ctxt->sax->characters(ctxt->userData, out, i);
7046 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007047 return;
7048 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007049
Daniel Veillard0161e632008-08-28 15:36:32 +00007050 /*
7051 * We are seeing an entity reference
7052 */
7053 ent = xmlParseEntityRef(ctxt);
7054 if (ent == NULL) return;
7055 if (!ctxt->wellFormed)
7056 return;
7057 was_checked = ent->checked;
7058
7059 /* special case of predefined entities */
7060 if ((ent->name == NULL) ||
7061 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7062 val = ent->content;
7063 if (val == NULL) return;
7064 /*
7065 * inline the entity.
7066 */
7067 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7068 (!ctxt->disableSAX))
7069 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7070 return;
7071 }
7072
7073 /*
7074 * The first reference to the entity trigger a parsing phase
7075 * where the ent->children is filled with the result from
7076 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007077 * Note: external parsed entities will not be loaded, it is not
7078 * required for a non-validating parser, unless the parsing option
7079 * of validating, or substituting entities were given. Doing so is
7080 * far more secure as the parser will only process data coming from
7081 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007082 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007083 if (((ent->checked == 0) ||
7084 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007085 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7086 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007087 unsigned long oldnbent = ctxt->nbentities;
7088
7089 /*
7090 * This is a bit hackish but this seems the best
7091 * way to make sure both SAX and DOM entity support
7092 * behaves okay.
7093 */
7094 void *user_data;
7095 if (ctxt->userData == ctxt)
7096 user_data = NULL;
7097 else
7098 user_data = ctxt->userData;
7099
7100 /*
7101 * Check that this entity is well formed
7102 * 4.3.2: An internal general parsed entity is well-formed
7103 * if its replacement text matches the production labeled
7104 * content.
7105 */
7106 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7107 ctxt->depth++;
7108 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7109 user_data, &list);
7110 ctxt->depth--;
7111
7112 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7113 ctxt->depth++;
7114 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7115 user_data, ctxt->depth, ent->URI,
7116 ent->ExternalID, &list);
7117 ctxt->depth--;
7118 } else {
7119 ret = XML_ERR_ENTITY_PE_INTERNAL;
7120 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7121 "invalid entity type found\n", NULL);
7122 }
7123
7124 /*
7125 * Store the number of entities needing parsing for this entity
7126 * content and do checkings
7127 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007128 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7129 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7130 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007131 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007132 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007133 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007134 return;
7135 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007136 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007137 xmlFreeNodeList(list);
7138 return;
7139 }
Owen Taylor3473f882001-02-23 17:55:21 +00007140
Daniel Veillard0161e632008-08-28 15:36:32 +00007141 if ((ret == XML_ERR_OK) && (list != NULL)) {
7142 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7143 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7144 (ent->children == NULL)) {
7145 ent->children = list;
7146 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007147 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007148 * Prune it directly in the generated document
7149 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007150 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007151 if (((list->type == XML_TEXT_NODE) &&
7152 (list->next == NULL)) ||
7153 (ctxt->parseMode == XML_PARSE_READER)) {
7154 list->parent = (xmlNodePtr) ent;
7155 list = NULL;
7156 ent->owner = 1;
7157 } else {
7158 ent->owner = 0;
7159 while (list != NULL) {
7160 list->parent = (xmlNodePtr) ctxt->node;
7161 list->doc = ctxt->myDoc;
7162 if (list->next == NULL)
7163 ent->last = list;
7164 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007165 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007166 list = ent->children;
7167#ifdef LIBXML_LEGACY_ENABLED
7168 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7169 xmlAddEntityReference(ent, list, NULL);
7170#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007171 }
7172 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007173 ent->owner = 1;
7174 while (list != NULL) {
7175 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007176 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007177 if (list->next == NULL)
7178 ent->last = list;
7179 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007180 }
7181 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007182 } else {
7183 xmlFreeNodeList(list);
7184 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007185 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007186 } else if ((ret != XML_ERR_OK) &&
7187 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7188 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7189 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007190 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007191 } else if (list != NULL) {
7192 xmlFreeNodeList(list);
7193 list = NULL;
7194 }
7195 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007196 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007197
7198 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7199 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007200 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007201 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007202 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007203
Daniel Veillard0161e632008-08-28 15:36:32 +00007204 /*
7205 * Now that the entity content has been gathered
7206 * provide it to the application, this can take different forms based
7207 * on the parsing modes.
7208 */
7209 if (ent->children == NULL) {
7210 /*
7211 * Probably running in SAX mode and the callbacks don't
7212 * build the entity content. So unless we already went
7213 * though parsing for first checking go though the entity
7214 * content to generate callbacks associated to the entity
7215 */
7216 if (was_checked != 0) {
7217 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007218 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007219 * This is a bit hackish but this seems the best
7220 * way to make sure both SAX and DOM entity support
7221 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007222 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007223 if (ctxt->userData == ctxt)
7224 user_data = NULL;
7225 else
7226 user_data = ctxt->userData;
7227
7228 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7229 ctxt->depth++;
7230 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7231 ent->content, user_data, NULL);
7232 ctxt->depth--;
7233 } else if (ent->etype ==
7234 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7235 ctxt->depth++;
7236 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7237 ctxt->sax, user_data, ctxt->depth,
7238 ent->URI, ent->ExternalID, NULL);
7239 ctxt->depth--;
7240 } else {
7241 ret = XML_ERR_ENTITY_PE_INTERNAL;
7242 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7243 "invalid entity type found\n", NULL);
7244 }
7245 if (ret == XML_ERR_ENTITY_LOOP) {
7246 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7247 return;
7248 }
7249 }
7250 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7251 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7252 /*
7253 * Entity reference callback comes second, it's somewhat
7254 * superfluous but a compatibility to historical behaviour
7255 */
7256 ctxt->sax->reference(ctxt->userData, ent->name);
7257 }
7258 return;
7259 }
7260
7261 /*
7262 * If we didn't get any children for the entity being built
7263 */
7264 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7265 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7266 /*
7267 * Create a node.
7268 */
7269 ctxt->sax->reference(ctxt->userData, ent->name);
7270 return;
7271 }
7272
7273 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7274 /*
7275 * There is a problem on the handling of _private for entities
7276 * (bug 155816): Should we copy the content of the field from
7277 * the entity (possibly overwriting some value set by the user
7278 * when a copy is created), should we leave it alone, or should
7279 * we try to take care of different situations? The problem
7280 * is exacerbated by the usage of this field by the xmlReader.
7281 * To fix this bug, we look at _private on the created node
7282 * and, if it's NULL, we copy in whatever was in the entity.
7283 * If it's not NULL we leave it alone. This is somewhat of a
7284 * hack - maybe we should have further tests to determine
7285 * what to do.
7286 */
7287 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7288 /*
7289 * Seems we are generating the DOM content, do
7290 * a simple tree copy for all references except the first
7291 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007292 */
7293 if (((list == NULL) && (ent->owner == 0)) ||
7294 (ctxt->parseMode == XML_PARSE_READER)) {
7295 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7296
7297 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007298 * We are copying here, make sure there is no abuse
7299 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007300 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007301 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7302 return;
7303
7304 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007305 * when operating on a reader, the entities definitions
7306 * are always owning the entities subtree.
7307 if (ctxt->parseMode == XML_PARSE_READER)
7308 ent->owner = 1;
7309 */
7310
7311 cur = ent->children;
7312 while (cur != NULL) {
7313 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7314 if (nw != NULL) {
7315 if (nw->_private == NULL)
7316 nw->_private = cur->_private;
7317 if (firstChild == NULL){
7318 firstChild = nw;
7319 }
7320 nw = xmlAddChild(ctxt->node, nw);
7321 }
7322 if (cur == ent->last) {
7323 /*
7324 * needed to detect some strange empty
7325 * node cases in the reader tests
7326 */
7327 if ((ctxt->parseMode == XML_PARSE_READER) &&
7328 (nw != NULL) &&
7329 (nw->type == XML_ELEMENT_NODE) &&
7330 (nw->children == NULL))
7331 nw->extra = 1;
7332
7333 break;
7334 }
7335 cur = cur->next;
7336 }
7337#ifdef LIBXML_LEGACY_ENABLED
7338 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7339 xmlAddEntityReference(ent, firstChild, nw);
7340#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007341 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007342 xmlNodePtr nw = NULL, cur, next, last,
7343 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007344
7345 /*
7346 * We are copying here, make sure there is no abuse
7347 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007348 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007349 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7350 return;
7351
Daniel Veillard0161e632008-08-28 15:36:32 +00007352 /*
7353 * Copy the entity child list and make it the new
7354 * entity child list. The goal is to make sure any
7355 * ID or REF referenced will be the one from the
7356 * document content and not the entity copy.
7357 */
7358 cur = ent->children;
7359 ent->children = NULL;
7360 last = ent->last;
7361 ent->last = NULL;
7362 while (cur != NULL) {
7363 next = cur->next;
7364 cur->next = NULL;
7365 cur->parent = NULL;
7366 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7367 if (nw != NULL) {
7368 if (nw->_private == NULL)
7369 nw->_private = cur->_private;
7370 if (firstChild == NULL){
7371 firstChild = cur;
7372 }
7373 xmlAddChild((xmlNodePtr) ent, nw);
7374 xmlAddChild(ctxt->node, cur);
7375 }
7376 if (cur == last)
7377 break;
7378 cur = next;
7379 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007380 if (ent->owner == 0)
7381 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007382#ifdef LIBXML_LEGACY_ENABLED
7383 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7384 xmlAddEntityReference(ent, firstChild, nw);
7385#endif /* LIBXML_LEGACY_ENABLED */
7386 } else {
7387 const xmlChar *nbktext;
7388
7389 /*
7390 * the name change is to avoid coalescing of the
7391 * node with a possible previous text one which
7392 * would make ent->children a dangling pointer
7393 */
7394 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7395 -1);
7396 if (ent->children->type == XML_TEXT_NODE)
7397 ent->children->name = nbktext;
7398 if ((ent->last != ent->children) &&
7399 (ent->last->type == XML_TEXT_NODE))
7400 ent->last->name = nbktext;
7401 xmlAddChildList(ctxt->node, ent->children);
7402 }
7403
7404 /*
7405 * This is to avoid a nasty side effect, see
7406 * characters() in SAX.c
7407 */
7408 ctxt->nodemem = 0;
7409 ctxt->nodelen = 0;
7410 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007411 }
7412 }
7413}
7414
7415/**
7416 * xmlParseEntityRef:
7417 * @ctxt: an XML parser context
7418 *
7419 * parse ENTITY references declarations
7420 *
7421 * [68] EntityRef ::= '&' Name ';'
7422 *
7423 * [ WFC: Entity Declared ]
7424 * In a document without any DTD, a document with only an internal DTD
7425 * subset which contains no parameter entity references, or a document
7426 * with "standalone='yes'", the Name given in the entity reference
7427 * must match that in an entity declaration, except that well-formed
7428 * documents need not declare any of the following entities: amp, lt,
7429 * gt, apos, quot. The declaration of a parameter entity must precede
7430 * any reference to it. Similarly, the declaration of a general entity
7431 * must precede any reference to it which appears in a default value in an
7432 * attribute-list declaration. Note that if entities are declared in the
7433 * external subset or in external parameter entities, a non-validating
7434 * processor is not obligated to read and process their declarations;
7435 * for such documents, the rule that an entity must be declared is a
7436 * well-formedness constraint only if standalone='yes'.
7437 *
7438 * [ WFC: Parsed Entity ]
7439 * An entity reference must not contain the name of an unparsed entity
7440 *
7441 * Returns the xmlEntityPtr if found, or NULL otherwise.
7442 */
7443xmlEntityPtr
7444xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007445 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007446 xmlEntityPtr ent = NULL;
7447
7448 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007449 if (ctxt->instate == XML_PARSER_EOF)
7450 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007451
Daniel Veillard0161e632008-08-28 15:36:32 +00007452 if (RAW != '&')
7453 return(NULL);
7454 NEXT;
7455 name = xmlParseName(ctxt);
7456 if (name == NULL) {
7457 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7458 "xmlParseEntityRef: no name\n");
7459 return(NULL);
7460 }
7461 if (RAW != ';') {
7462 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7463 return(NULL);
7464 }
7465 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007466
Daniel Veillard0161e632008-08-28 15:36:32 +00007467 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007468 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007469 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007470 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7471 ent = xmlGetPredefinedEntity(name);
7472 if (ent != NULL)
7473 return(ent);
7474 }
Owen Taylor3473f882001-02-23 17:55:21 +00007475
Daniel Veillard0161e632008-08-28 15:36:32 +00007476 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007477 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007478 */
7479 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007480
Daniel Veillard0161e632008-08-28 15:36:32 +00007481 /*
7482 * Ask first SAX for entity resolution, otherwise try the
7483 * entities which may have stored in the parser context.
7484 */
7485 if (ctxt->sax != NULL) {
7486 if (ctxt->sax->getEntity != NULL)
7487 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007488 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007489 (ctxt->options & XML_PARSE_OLDSAX))
7490 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007491 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7492 (ctxt->userData==ctxt)) {
7493 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007494 }
7495 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007496 if (ctxt->instate == XML_PARSER_EOF)
7497 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007498 /*
7499 * [ WFC: Entity Declared ]
7500 * In a document without any DTD, a document with only an
7501 * internal DTD subset which contains no parameter entity
7502 * references, or a document with "standalone='yes'", the
7503 * Name given in the entity reference must match that in an
7504 * entity declaration, except that well-formed documents
7505 * need not declare any of the following entities: amp, lt,
7506 * gt, apos, quot.
7507 * The declaration of a parameter entity must precede any
7508 * reference to it.
7509 * Similarly, the declaration of a general entity must
7510 * precede any reference to it which appears in a default
7511 * value in an attribute-list declaration. Note that if
7512 * entities are declared in the external subset or in
7513 * external parameter entities, a non-validating processor
7514 * is not obligated to read and process their declarations;
7515 * for such documents, the rule that an entity must be
7516 * declared is a well-formedness constraint only if
7517 * standalone='yes'.
7518 */
7519 if (ent == NULL) {
7520 if ((ctxt->standalone == 1) ||
7521 ((ctxt->hasExternalSubset == 0) &&
7522 (ctxt->hasPErefs == 0))) {
7523 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7524 "Entity '%s' not defined\n", name);
7525 } else {
7526 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7527 "Entity '%s' not defined\n", name);
7528 if ((ctxt->inSubset == 0) &&
7529 (ctxt->sax != NULL) &&
7530 (ctxt->sax->reference != NULL)) {
7531 ctxt->sax->reference(ctxt->userData, name);
7532 }
7533 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007534 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007535 ctxt->valid = 0;
7536 }
7537
7538 /*
7539 * [ WFC: Parsed Entity ]
7540 * An entity reference must not contain the name of an
7541 * unparsed entity
7542 */
7543 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7544 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7545 "Entity reference to unparsed entity %s\n", name);
7546 }
7547
7548 /*
7549 * [ WFC: No External Entity References ]
7550 * Attribute values cannot contain direct or indirect
7551 * entity references to external entities.
7552 */
7553 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7554 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7555 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7556 "Attribute references external entity '%s'\n", name);
7557 }
7558 /*
7559 * [ WFC: No < in Attribute Values ]
7560 * The replacement text of any entity referred to directly or
7561 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007562 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007563 */
7564 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007565 (ent != NULL) &&
7566 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007567 if (((ent->checked & 1) || (ent->checked == 0)) &&
7568 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007569 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7570 "'<' in entity '%s' is not allowed in attributes values\n", name);
7571 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007572 }
7573
7574 /*
7575 * Internal check, no parameter entities here ...
7576 */
7577 else {
7578 switch (ent->etype) {
7579 case XML_INTERNAL_PARAMETER_ENTITY:
7580 case XML_EXTERNAL_PARAMETER_ENTITY:
7581 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7582 "Attempt to reference the parameter entity '%s'\n",
7583 name);
7584 break;
7585 default:
7586 break;
7587 }
7588 }
7589
7590 /*
7591 * [ WFC: No Recursion ]
7592 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007593 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007594 * Done somewhere else
7595 */
Owen Taylor3473f882001-02-23 17:55:21 +00007596 return(ent);
7597}
7598
7599/**
7600 * xmlParseStringEntityRef:
7601 * @ctxt: an XML parser context
7602 * @str: a pointer to an index in the string
7603 *
7604 * parse ENTITY references declarations, but this version parses it from
7605 * a string value.
7606 *
7607 * [68] EntityRef ::= '&' Name ';'
7608 *
7609 * [ WFC: Entity Declared ]
7610 * In a document without any DTD, a document with only an internal DTD
7611 * subset which contains no parameter entity references, or a document
7612 * with "standalone='yes'", the Name given in the entity reference
7613 * must match that in an entity declaration, except that well-formed
7614 * documents need not declare any of the following entities: amp, lt,
7615 * gt, apos, quot. The declaration of a parameter entity must precede
7616 * any reference to it. Similarly, the declaration of a general entity
7617 * must precede any reference to it which appears in a default value in an
7618 * attribute-list declaration. Note that if entities are declared in the
7619 * external subset or in external parameter entities, a non-validating
7620 * processor is not obligated to read and process their declarations;
7621 * for such documents, the rule that an entity must be declared is a
7622 * well-formedness constraint only if standalone='yes'.
7623 *
7624 * [ WFC: Parsed Entity ]
7625 * An entity reference must not contain the name of an unparsed entity
7626 *
7627 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7628 * is updated to the current location in the string.
7629 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007630static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007631xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7632 xmlChar *name;
7633 const xmlChar *ptr;
7634 xmlChar cur;
7635 xmlEntityPtr ent = NULL;
7636
7637 if ((str == NULL) || (*str == NULL))
7638 return(NULL);
7639 ptr = *str;
7640 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007641 if (cur != '&')
7642 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007643
Daniel Veillard0161e632008-08-28 15:36:32 +00007644 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007645 name = xmlParseStringName(ctxt, &ptr);
7646 if (name == NULL) {
7647 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7648 "xmlParseStringEntityRef: no name\n");
7649 *str = ptr;
7650 return(NULL);
7651 }
7652 if (*ptr != ';') {
7653 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007654 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007655 *str = ptr;
7656 return(NULL);
7657 }
7658 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007659
Owen Taylor3473f882001-02-23 17:55:21 +00007660
Daniel Veillard0161e632008-08-28 15:36:32 +00007661 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007662 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007663 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007664 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7665 ent = xmlGetPredefinedEntity(name);
7666 if (ent != NULL) {
7667 xmlFree(name);
7668 *str = ptr;
7669 return(ent);
7670 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007671 }
Owen Taylor3473f882001-02-23 17:55:21 +00007672
Daniel Veillard0161e632008-08-28 15:36:32 +00007673 /*
7674 * Increate the number of entity references parsed
7675 */
7676 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007677
Daniel Veillard0161e632008-08-28 15:36:32 +00007678 /*
7679 * Ask first SAX for entity resolution, otherwise try the
7680 * entities which may have stored in the parser context.
7681 */
7682 if (ctxt->sax != NULL) {
7683 if (ctxt->sax->getEntity != NULL)
7684 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007685 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7686 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007687 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7688 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007689 }
7690 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007691 if (ctxt->instate == XML_PARSER_EOF) {
7692 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007693 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007694 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007695
7696 /*
7697 * [ WFC: Entity Declared ]
7698 * In a document without any DTD, a document with only an
7699 * internal DTD subset which contains no parameter entity
7700 * references, or a document with "standalone='yes'", the
7701 * Name given in the entity reference must match that in an
7702 * entity declaration, except that well-formed documents
7703 * need not declare any of the following entities: amp, lt,
7704 * gt, apos, quot.
7705 * The declaration of a parameter entity must precede any
7706 * reference to it.
7707 * Similarly, the declaration of a general entity must
7708 * precede any reference to it which appears in a default
7709 * value in an attribute-list declaration. Note that if
7710 * entities are declared in the external subset or in
7711 * external parameter entities, a non-validating processor
7712 * is not obligated to read and process their declarations;
7713 * for such documents, the rule that an entity must be
7714 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007715 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007716 */
7717 if (ent == NULL) {
7718 if ((ctxt->standalone == 1) ||
7719 ((ctxt->hasExternalSubset == 0) &&
7720 (ctxt->hasPErefs == 0))) {
7721 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7722 "Entity '%s' not defined\n", name);
7723 } else {
7724 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7725 "Entity '%s' not defined\n",
7726 name);
7727 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007728 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007729 /* TODO ? check regressions ctxt->valid = 0; */
7730 }
7731
7732 /*
7733 * [ WFC: Parsed Entity ]
7734 * An entity reference must not contain the name of an
7735 * unparsed entity
7736 */
7737 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7738 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7739 "Entity reference to unparsed entity %s\n", name);
7740 }
7741
7742 /*
7743 * [ WFC: No External Entity References ]
7744 * Attribute values cannot contain direct or indirect
7745 * entity references to external entities.
7746 */
7747 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7748 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7749 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7750 "Attribute references external entity '%s'\n", name);
7751 }
7752 /*
7753 * [ WFC: No < in Attribute Values ]
7754 * The replacement text of any entity referred to directly or
7755 * indirectly in an attribute value (other than "&lt;") must
7756 * not contain a <.
7757 */
7758 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7759 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007760 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007761 (xmlStrchr(ent->content, '<'))) {
7762 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7763 "'<' in entity '%s' is not allowed in attributes values\n",
7764 name);
7765 }
7766
7767 /*
7768 * Internal check, no parameter entities here ...
7769 */
7770 else {
7771 switch (ent->etype) {
7772 case XML_INTERNAL_PARAMETER_ENTITY:
7773 case XML_EXTERNAL_PARAMETER_ENTITY:
7774 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7775 "Attempt to reference the parameter entity '%s'\n",
7776 name);
7777 break;
7778 default:
7779 break;
7780 }
7781 }
7782
7783 /*
7784 * [ WFC: No Recursion ]
7785 * A parsed entity must not contain a recursive reference
7786 * to itself, either directly or indirectly.
7787 * Done somewhere else
7788 */
7789
7790 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007791 *str = ptr;
7792 return(ent);
7793}
7794
7795/**
7796 * xmlParsePEReference:
7797 * @ctxt: an XML parser context
7798 *
7799 * parse PEReference declarations
7800 * The entity content is handled directly by pushing it's content as
7801 * a new input stream.
7802 *
7803 * [69] PEReference ::= '%' Name ';'
7804 *
7805 * [ WFC: No Recursion ]
7806 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007807 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007808 *
7809 * [ WFC: Entity Declared ]
7810 * In a document without any DTD, a document with only an internal DTD
7811 * subset which contains no parameter entity references, or a document
7812 * with "standalone='yes'", ... ... The declaration of a parameter
7813 * entity must precede any reference to it...
7814 *
7815 * [ VC: Entity Declared ]
7816 * In a document with an external subset or external parameter entities
7817 * with "standalone='no'", ... ... The declaration of a parameter entity
7818 * must precede any reference to it...
7819 *
7820 * [ WFC: In DTD ]
7821 * Parameter-entity references may only appear in the DTD.
7822 * NOTE: misleading but this is handled.
7823 */
7824void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007825xmlParsePEReference(xmlParserCtxtPtr ctxt)
7826{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007827 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007828 xmlEntityPtr entity = NULL;
7829 xmlParserInputPtr input;
7830
Daniel Veillard0161e632008-08-28 15:36:32 +00007831 if (RAW != '%')
7832 return;
7833 NEXT;
7834 name = xmlParseName(ctxt);
7835 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007836 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007837 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007838 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007839 if (xmlParserDebugEntities)
7840 xmlGenericError(xmlGenericErrorContext,
7841 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007842 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007843 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007844 return;
7845 }
7846
7847 NEXT;
7848
7849 /*
7850 * Increate the number of entity references parsed
7851 */
7852 ctxt->nbentities++;
7853
7854 /*
7855 * Request the entity from SAX
7856 */
7857 if ((ctxt->sax != NULL) &&
7858 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007859 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7860 if (ctxt->instate == XML_PARSER_EOF)
7861 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007862 if (entity == NULL) {
7863 /*
7864 * [ WFC: Entity Declared ]
7865 * In a document without any DTD, a document with only an
7866 * internal DTD subset which contains no parameter entity
7867 * references, or a document with "standalone='yes'", ...
7868 * ... The declaration of a parameter entity must precede
7869 * any reference to it...
7870 */
7871 if ((ctxt->standalone == 1) ||
7872 ((ctxt->hasExternalSubset == 0) &&
7873 (ctxt->hasPErefs == 0))) {
7874 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7875 "PEReference: %%%s; not found\n",
7876 name);
7877 } else {
7878 /*
7879 * [ VC: Entity Declared ]
7880 * In a document with an external subset or external
7881 * parameter entities with "standalone='no'", ...
7882 * ... The declaration of a parameter entity must
7883 * precede any reference to it...
7884 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007885 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7886 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7887 "PEReference: %%%s; not found\n",
7888 name, NULL);
7889 } else
7890 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7891 "PEReference: %%%s; not found\n",
7892 name, NULL);
7893 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007894 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007895 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007896 } else {
7897 /*
7898 * Internal checking in case the entity quest barfed
7899 */
7900 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7901 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7902 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7903 "Internal: %%%s; is not a parameter entity\n",
7904 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007905 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007906 xmlChar start[4];
7907 xmlCharEncoding enc;
7908
Neel Mehta90ccb582017-04-07 17:43:02 +02007909 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7910 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7911 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7912 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7913 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7914 (ctxt->replaceEntities == 0) &&
7915 (ctxt->validate == 0))
7916 return;
7917
Daniel Veillard0161e632008-08-28 15:36:32 +00007918 input = xmlNewEntityInputStream(ctxt, entity);
7919 if (xmlPushInput(ctxt, input) < 0)
7920 return;
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02007921
7922 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7923 /*
7924 * Get the 4 first bytes and decode the charset
7925 * if enc != XML_CHAR_ENCODING_NONE
7926 * plug some encoding conversion routines.
7927 * Note that, since we may have some non-UTF8
7928 * encoding (like UTF16, bug 135229), the 'length'
7929 * is not known, but we can calculate based upon
7930 * the amount of data in the buffer.
7931 */
7932 GROW
7933 if (ctxt->instate == XML_PARSER_EOF)
7934 return;
7935 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7936 start[0] = RAW;
7937 start[1] = NXT(1);
7938 start[2] = NXT(2);
7939 start[3] = NXT(3);
7940 enc = xmlDetectCharEncoding(start, 4);
7941 if (enc != XML_CHAR_ENCODING_NONE) {
7942 xmlSwitchEncoding(ctxt, enc);
7943 }
7944 }
7945
7946 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7947 (IS_BLANK_CH(NXT(5)))) {
7948 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02007949 }
7950 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007951 }
7952 }
7953 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007954}
7955
7956/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007957 * xmlLoadEntityContent:
7958 * @ctxt: an XML parser context
7959 * @entity: an unloaded system entity
7960 *
7961 * Load the original content of the given system entity from the
7962 * ExternalID/SystemID given. This is to be used for Included in Literal
7963 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7964 *
7965 * Returns 0 in case of success and -1 in case of failure
7966 */
7967static int
7968xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7969 xmlParserInputPtr input;
7970 xmlBufferPtr buf;
7971 int l, c;
7972 int count = 0;
7973
7974 if ((ctxt == NULL) || (entity == NULL) ||
7975 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7976 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7977 (entity->content != NULL)) {
7978 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7979 "xmlLoadEntityContent parameter error");
7980 return(-1);
7981 }
7982
7983 if (xmlParserDebugEntities)
7984 xmlGenericError(xmlGenericErrorContext,
7985 "Reading %s entity content input\n", entity->name);
7986
7987 buf = xmlBufferCreate();
7988 if (buf == NULL) {
7989 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7990 "xmlLoadEntityContent parameter error");
7991 return(-1);
7992 }
7993
7994 input = xmlNewEntityInputStream(ctxt, entity);
7995 if (input == NULL) {
7996 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7997 "xmlLoadEntityContent input error");
7998 xmlBufferFree(buf);
7999 return(-1);
8000 }
8001
8002 /*
8003 * Push the entity as the current input, read char by char
8004 * saving to the buffer until the end of the entity or an error
8005 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008006 if (xmlPushInput(ctxt, input) < 0) {
8007 xmlBufferFree(buf);
8008 return(-1);
8009 }
8010
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008011 GROW;
8012 c = CUR_CHAR(l);
8013 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8014 (IS_CHAR(c))) {
8015 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008016 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008017 count = 0;
8018 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008019 if (ctxt->instate == XML_PARSER_EOF) {
8020 xmlBufferFree(buf);
8021 return(-1);
8022 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008023 }
8024 NEXTL(l);
8025 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008026 if (c == 0) {
8027 count = 0;
8028 GROW;
8029 if (ctxt->instate == XML_PARSER_EOF) {
8030 xmlBufferFree(buf);
8031 return(-1);
8032 }
8033 c = CUR_CHAR(l);
8034 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008035 }
8036
8037 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8038 xmlPopInput(ctxt);
8039 } else if (!IS_CHAR(c)) {
8040 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8041 "xmlLoadEntityContent: invalid char value %d\n",
8042 c);
8043 xmlBufferFree(buf);
8044 return(-1);
8045 }
8046 entity->content = buf->content;
8047 buf->content = NULL;
8048 xmlBufferFree(buf);
8049
8050 return(0);
8051}
8052
8053/**
Owen Taylor3473f882001-02-23 17:55:21 +00008054 * xmlParseStringPEReference:
8055 * @ctxt: an XML parser context
8056 * @str: a pointer to an index in the string
8057 *
8058 * parse PEReference declarations
8059 *
8060 * [69] PEReference ::= '%' Name ';'
8061 *
8062 * [ WFC: No Recursion ]
8063 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008064 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008065 *
8066 * [ WFC: Entity Declared ]
8067 * In a document without any DTD, a document with only an internal DTD
8068 * subset which contains no parameter entity references, or a document
8069 * with "standalone='yes'", ... ... The declaration of a parameter
8070 * entity must precede any reference to it...
8071 *
8072 * [ VC: Entity Declared ]
8073 * In a document with an external subset or external parameter entities
8074 * with "standalone='no'", ... ... The declaration of a parameter entity
8075 * must precede any reference to it...
8076 *
8077 * [ WFC: In DTD ]
8078 * Parameter-entity references may only appear in the DTD.
8079 * NOTE: misleading but this is handled.
8080 *
8081 * Returns the string of the entity content.
8082 * str is updated to the current value of the index
8083 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008084static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008085xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8086 const xmlChar *ptr;
8087 xmlChar cur;
8088 xmlChar *name;
8089 xmlEntityPtr entity = NULL;
8090
8091 if ((str == NULL) || (*str == NULL)) return(NULL);
8092 ptr = *str;
8093 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008094 if (cur != '%')
8095 return(NULL);
8096 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008097 name = xmlParseStringName(ctxt, &ptr);
8098 if (name == NULL) {
8099 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8100 "xmlParseStringPEReference: no name\n");
8101 *str = ptr;
8102 return(NULL);
8103 }
8104 cur = *ptr;
8105 if (cur != ';') {
8106 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8107 xmlFree(name);
8108 *str = ptr;
8109 return(NULL);
8110 }
8111 ptr++;
8112
8113 /*
8114 * Increate the number of entity references parsed
8115 */
8116 ctxt->nbentities++;
8117
8118 /*
8119 * Request the entity from SAX
8120 */
8121 if ((ctxt->sax != NULL) &&
8122 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008123 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8124 if (ctxt->instate == XML_PARSER_EOF) {
8125 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008126 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008127 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008128 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008129 if (entity == NULL) {
8130 /*
8131 * [ WFC: Entity Declared ]
8132 * In a document without any DTD, a document with only an
8133 * internal DTD subset which contains no parameter entity
8134 * references, or a document with "standalone='yes'", ...
8135 * ... The declaration of a parameter entity must precede
8136 * any reference to it...
8137 */
8138 if ((ctxt->standalone == 1) ||
8139 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8140 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8141 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008142 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008143 /*
8144 * [ VC: Entity Declared ]
8145 * In a document with an external subset or external
8146 * parameter entities with "standalone='no'", ...
8147 * ... The declaration of a parameter entity must
8148 * precede any reference to it...
8149 */
8150 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8151 "PEReference: %%%s; not found\n",
8152 name, NULL);
8153 ctxt->valid = 0;
8154 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008155 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008156 } else {
8157 /*
8158 * Internal checking in case the entity quest barfed
8159 */
8160 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8161 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8162 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8163 "%%%s; is not a parameter entity\n",
8164 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008165 }
8166 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008167 ctxt->hasPErefs = 1;
8168 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008169 *str = ptr;
8170 return(entity);
8171}
8172
8173/**
8174 * xmlParseDocTypeDecl:
8175 * @ctxt: an XML parser context
8176 *
8177 * parse a DOCTYPE declaration
8178 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008179 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008180 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8181 *
8182 * [ VC: Root Element Type ]
8183 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008184 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008185 */
8186
8187void
8188xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008189 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008190 xmlChar *ExternalID = NULL;
8191 xmlChar *URI = NULL;
8192
8193 /*
8194 * We know that '<!DOCTYPE' has been detected.
8195 */
8196 SKIP(9);
8197
8198 SKIP_BLANKS;
8199
8200 /*
8201 * Parse the DOCTYPE name.
8202 */
8203 name = xmlParseName(ctxt);
8204 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008205 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8206 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008207 }
8208 ctxt->intSubName = name;
8209
8210 SKIP_BLANKS;
8211
8212 /*
8213 * Check for SystemID and ExternalID
8214 */
8215 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8216
8217 if ((URI != NULL) || (ExternalID != NULL)) {
8218 ctxt->hasExternalSubset = 1;
8219 }
8220 ctxt->extSubURI = URI;
8221 ctxt->extSubSystem = ExternalID;
8222
8223 SKIP_BLANKS;
8224
8225 /*
8226 * Create and update the internal subset.
8227 */
8228 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8229 (!ctxt->disableSAX))
8230 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008231 if (ctxt->instate == XML_PARSER_EOF)
8232 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008233
8234 /*
8235 * Is there any internal subset declarations ?
8236 * they are handled separately in xmlParseInternalSubset()
8237 */
8238 if (RAW == '[')
8239 return;
8240
8241 /*
8242 * We should be at the end of the DOCTYPE declaration.
8243 */
8244 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008245 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008246 }
8247 NEXT;
8248}
8249
8250/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008251 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008252 * @ctxt: an XML parser context
8253 *
8254 * parse the internal subset declaration
8255 *
8256 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8257 */
8258
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008259static void
Owen Taylor3473f882001-02-23 17:55:21 +00008260xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8261 /*
8262 * Is there any DTD definition ?
8263 */
8264 if (RAW == '[') {
8265 ctxt->instate = XML_PARSER_DTD;
8266 NEXT;
8267 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008268 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008269 * PEReferences.
8270 * Subsequence (markupdecl | PEReference | S)*
8271 */
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008272 while (((RAW != ']') || (ctxt->inputNr > 1)) &&
8273 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008274 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008275 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008276
8277 SKIP_BLANKS;
8278 xmlParseMarkupDecl(ctxt);
8279 xmlParsePEReference(ctxt);
8280
Owen Taylor3473f882001-02-23 17:55:21 +00008281 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008282 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008283 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008284 if (ctxt->inputNr > 1)
8285 xmlPopInput(ctxt);
8286 else
8287 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008288 }
8289 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008290 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008291 NEXT;
8292 SKIP_BLANKS;
8293 }
8294 }
8295
8296 /*
8297 * We should be at the end of the DOCTYPE declaration.
8298 */
8299 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008300 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008301 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008302 }
8303 NEXT;
8304}
8305
Daniel Veillard81273902003-09-30 00:43:48 +00008306#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008307/**
8308 * xmlParseAttribute:
8309 * @ctxt: an XML parser context
8310 * @value: a xmlChar ** used to store the value of the attribute
8311 *
8312 * parse an attribute
8313 *
8314 * [41] Attribute ::= Name Eq AttValue
8315 *
8316 * [ WFC: No External Entity References ]
8317 * Attribute values cannot contain direct or indirect entity references
8318 * to external entities.
8319 *
8320 * [ WFC: No < in Attribute Values ]
8321 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008322 * an attribute value (other than "&lt;") must not contain a <.
8323 *
Owen Taylor3473f882001-02-23 17:55:21 +00008324 * [ VC: Attribute Value Type ]
8325 * The attribute must have been declared; the value must be of the type
8326 * declared for it.
8327 *
8328 * [25] Eq ::= S? '=' S?
8329 *
8330 * With namespace:
8331 *
8332 * [NS 11] Attribute ::= QName Eq AttValue
8333 *
8334 * Also the case QName == xmlns:??? is handled independently as a namespace
8335 * definition.
8336 *
8337 * Returns the attribute name, and the value in *value.
8338 */
8339
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008340const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008341xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008342 const xmlChar *name;
8343 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008344
8345 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008346 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008347 name = xmlParseName(ctxt);
8348 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008349 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008350 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008351 return(NULL);
8352 }
8353
8354 /*
8355 * read the value
8356 */
8357 SKIP_BLANKS;
8358 if (RAW == '=') {
8359 NEXT;
8360 SKIP_BLANKS;
8361 val = xmlParseAttValue(ctxt);
8362 ctxt->instate = XML_PARSER_CONTENT;
8363 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008364 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008365 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008366 return(NULL);
8367 }
8368
8369 /*
8370 * Check that xml:lang conforms to the specification
8371 * No more registered as an error, just generate a warning now
8372 * since this was deprecated in XML second edition
8373 */
8374 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8375 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008376 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8377 "Malformed value for xml:lang : %s\n",
8378 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008379 }
8380 }
8381
8382 /*
8383 * Check that xml:space conforms to the specification
8384 */
8385 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8386 if (xmlStrEqual(val, BAD_CAST "default"))
8387 *(ctxt->space) = 0;
8388 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8389 *(ctxt->space) = 1;
8390 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008391 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008392"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008393 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008394 }
8395 }
8396
8397 *value = val;
8398 return(name);
8399}
8400
8401/**
8402 * xmlParseStartTag:
8403 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008404 *
Owen Taylor3473f882001-02-23 17:55:21 +00008405 * parse a start of tag either for rule element or
8406 * EmptyElement. In both case we don't parse the tag closing chars.
8407 *
8408 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8409 *
8410 * [ WFC: Unique Att Spec ]
8411 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008412 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008413 *
8414 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8415 *
8416 * [ WFC: Unique Att Spec ]
8417 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008418 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008419 *
8420 * With namespace:
8421 *
8422 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8423 *
8424 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8425 *
8426 * Returns the element name parsed
8427 */
8428
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008429const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008430xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008431 const xmlChar *name;
8432 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008433 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008434 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008435 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008436 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008437 int i;
8438
8439 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008440 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008441
8442 name = xmlParseName(ctxt);
8443 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008445 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008446 return(NULL);
8447 }
8448
8449 /*
8450 * Now parse the attributes, it ends up with the ending
8451 *
8452 * (S Attribute)* S?
8453 */
8454 SKIP_BLANKS;
8455 GROW;
8456
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008457 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008458 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008459 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008460 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008461 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008462
8463 attname = xmlParseAttribute(ctxt, &attvalue);
8464 if ((attname != NULL) && (attvalue != NULL)) {
8465 /*
8466 * [ WFC: Unique Att Spec ]
8467 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008468 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008469 */
8470 for (i = 0; i < nbatts;i += 2) {
8471 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008472 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008473 xmlFree(attvalue);
8474 goto failed;
8475 }
8476 }
Owen Taylor3473f882001-02-23 17:55:21 +00008477 /*
8478 * Add the pair to atts
8479 */
8480 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008481 maxatts = 22; /* allow for 10 attrs by default */
8482 atts = (const xmlChar **)
8483 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008484 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008485 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008486 if (attvalue != NULL)
8487 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008488 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008489 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008490 ctxt->atts = atts;
8491 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008492 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008493 const xmlChar **n;
8494
Owen Taylor3473f882001-02-23 17:55:21 +00008495 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008496 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008497 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008498 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008499 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008500 if (attvalue != NULL)
8501 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008502 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008503 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008504 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008505 ctxt->atts = atts;
8506 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008507 }
8508 atts[nbatts++] = attname;
8509 atts[nbatts++] = attvalue;
8510 atts[nbatts] = NULL;
8511 atts[nbatts + 1] = NULL;
8512 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008513 if (attvalue != NULL)
8514 xmlFree(attvalue);
8515 }
8516
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008517failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008518
Daniel Veillard3772de32002-12-17 10:31:45 +00008519 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008520 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8521 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008522 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008523 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8524 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008525 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008526 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8527 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008528 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8529 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008530 break;
8531 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008532 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008533 GROW;
8534 }
8535
8536 /*
8537 * SAX: Start of Element !
8538 */
8539 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008540 (!ctxt->disableSAX)) {
8541 if (nbatts > 0)
8542 ctxt->sax->startElement(ctxt->userData, name, atts);
8543 else
8544 ctxt->sax->startElement(ctxt->userData, name, NULL);
8545 }
Owen Taylor3473f882001-02-23 17:55:21 +00008546
8547 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008548 /* Free only the content strings */
8549 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008550 if (atts[i] != NULL)
8551 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008552 }
8553 return(name);
8554}
8555
8556/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008557 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008558 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008559 * @line: line of the start tag
8560 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008561 *
8562 * parse an end of tag
8563 *
8564 * [42] ETag ::= '</' Name S? '>'
8565 *
8566 * With namespace
8567 *
8568 * [NS 9] ETag ::= '</' QName S? '>'
8569 */
8570
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008571static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008572xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008573 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008574
8575 GROW;
8576 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008577 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008578 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008579 return;
8580 }
8581 SKIP(2);
8582
Daniel Veillard46de64e2002-05-29 08:21:33 +00008583 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008584
8585 /*
8586 * We should definitely be at the ending "S? '>'" part
8587 */
8588 GROW;
8589 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008590 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008591 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008592 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008593 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008594
8595 /*
8596 * [ WFC: Element Type Match ]
8597 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008598 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008599 *
8600 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008601 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008602 if (name == NULL) name = BAD_CAST "unparseable";
8603 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008604 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008605 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008606 }
8607
8608 /*
8609 * SAX: End of Tag
8610 */
8611 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8612 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008613 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008614
Daniel Veillarde57ec792003-09-10 10:50:59 +00008615 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008616 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008617 return;
8618}
8619
8620/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008621 * xmlParseEndTag:
8622 * @ctxt: an XML parser context
8623 *
8624 * parse an end of tag
8625 *
8626 * [42] ETag ::= '</' Name S? '>'
8627 *
8628 * With namespace
8629 *
8630 * [NS 9] ETag ::= '</' QName S? '>'
8631 */
8632
8633void
8634xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008635 xmlParseEndTag1(ctxt, 0);
8636}
Daniel Veillard81273902003-09-30 00:43:48 +00008637#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008638
8639/************************************************************************
8640 * *
8641 * SAX 2 specific operations *
8642 * *
8643 ************************************************************************/
8644
Daniel Veillard0fb18932003-09-07 09:14:37 +00008645/*
8646 * xmlGetNamespace:
8647 * @ctxt: an XML parser context
8648 * @prefix: the prefix to lookup
8649 *
8650 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008651 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652 *
8653 * Returns the namespace name or NULL if not bound
8654 */
8655static const xmlChar *
8656xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8657 int i;
8658
Daniel Veillarde57ec792003-09-10 10:50:59 +00008659 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008660 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008661 if (ctxt->nsTab[i] == prefix) {
8662 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8663 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008664 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008665 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008666 return(NULL);
8667}
8668
8669/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008670 * xmlParseQName:
8671 * @ctxt: an XML parser context
8672 * @prefix: pointer to store the prefix part
8673 *
8674 * parse an XML Namespace QName
8675 *
8676 * [6] QName ::= (Prefix ':')? LocalPart
8677 * [7] Prefix ::= NCName
8678 * [8] LocalPart ::= NCName
8679 *
8680 * Returns the Name parsed or NULL
8681 */
8682
8683static const xmlChar *
8684xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8685 const xmlChar *l, *p;
8686
8687 GROW;
8688
8689 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008690 if (l == NULL) {
8691 if (CUR == ':') {
8692 l = xmlParseName(ctxt);
8693 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008694 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008695 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008696 *prefix = NULL;
8697 return(l);
8698 }
8699 }
8700 return(NULL);
8701 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008702 if (CUR == ':') {
8703 NEXT;
8704 p = l;
8705 l = xmlParseNCName(ctxt);
8706 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008707 xmlChar *tmp;
8708
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008709 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8710 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008711 l = xmlParseNmtoken(ctxt);
8712 if (l == NULL)
8713 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8714 else {
8715 tmp = xmlBuildQName(l, p, NULL, 0);
8716 xmlFree((char *)l);
8717 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008718 p = xmlDictLookup(ctxt->dict, tmp, -1);
8719 if (tmp != NULL) xmlFree(tmp);
8720 *prefix = NULL;
8721 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008722 }
8723 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008724 xmlChar *tmp;
8725
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008726 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8727 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008728 NEXT;
8729 tmp = (xmlChar *) xmlParseName(ctxt);
8730 if (tmp != NULL) {
8731 tmp = xmlBuildQName(tmp, l, NULL, 0);
8732 l = xmlDictLookup(ctxt->dict, tmp, -1);
8733 if (tmp != NULL) xmlFree(tmp);
8734 *prefix = p;
8735 return(l);
8736 }
8737 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8738 l = xmlDictLookup(ctxt->dict, tmp, -1);
8739 if (tmp != NULL) xmlFree(tmp);
8740 *prefix = p;
8741 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008742 }
8743 *prefix = p;
8744 } else
8745 *prefix = NULL;
8746 return(l);
8747}
8748
8749/**
8750 * xmlParseQNameAndCompare:
8751 * @ctxt: an XML parser context
8752 * @name: the localname
8753 * @prefix: the prefix, if any.
8754 *
8755 * parse an XML name and compares for match
8756 * (specialized for endtag parsing)
8757 *
8758 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8759 * and the name for mismatch
8760 */
8761
8762static const xmlChar *
8763xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8764 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008765 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008766 const xmlChar *in;
8767 const xmlChar *ret;
8768 const xmlChar *prefix2;
8769
8770 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8771
8772 GROW;
8773 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008774
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 cmp = prefix;
8776 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008777 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008778 ++cmp;
8779 }
8780 if ((*cmp == 0) && (*in == ':')) {
8781 in++;
8782 cmp = name;
8783 while (*in != 0 && *in == *cmp) {
8784 ++in;
8785 ++cmp;
8786 }
William M. Brack76e95df2003-10-18 16:20:14 +00008787 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008788 /* success */
8789 ctxt->input->cur = in;
8790 return((const xmlChar*) 1);
8791 }
8792 }
8793 /*
8794 * all strings coms from the dictionary, equality can be done directly
8795 */
8796 ret = xmlParseQName (ctxt, &prefix2);
8797 if ((ret == name) && (prefix == prefix2))
8798 return((const xmlChar*) 1);
8799 return ret;
8800}
8801
8802/**
8803 * xmlParseAttValueInternal:
8804 * @ctxt: an XML parser context
8805 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008806 * @alloc: whether the attribute was reallocated as a new string
8807 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008808 *
8809 * parse a value for an attribute.
8810 * NOTE: if no normalization is needed, the routine will return pointers
8811 * directly from the data buffer.
8812 *
8813 * 3.3.3 Attribute-Value Normalization:
8814 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008815 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008816 * - a character reference is processed by appending the referenced
8817 * character to the attribute value
8818 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008819 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008820 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8821 * appending #x20 to the normalized value, except that only a single
8822 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008823 * parsed entity or the literal entity value of an internal parsed entity
8824 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008825 * If the declared value is not CDATA, then the XML processor must further
8826 * process the normalized attribute value by discarding any leading and
8827 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008828 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008829 * All attributes for which no declaration has been read should be treated
8830 * by a non-validating parser as if declared CDATA.
8831 *
8832 * Returns the AttValue parsed or NULL. The value has to be freed by the
8833 * caller if it was copied, this can be detected by val[*len] == 0.
8834 */
8835
8836static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008837xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8838 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008839{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008840 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008841 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008842 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008843 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008844
8845 GROW;
8846 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008847 line = ctxt->input->line;
8848 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008850 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008851 return (NULL);
8852 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008853 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008854
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008855 /*
8856 * try to handle in this routine the most common case where no
8857 * allocation of a new string is required and where content is
8858 * pure ASCII.
8859 */
8860 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008861 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008862 end = ctxt->input->end;
8863 start = in;
8864 if (in >= end) {
8865 const xmlChar *oldbase = ctxt->input->base;
8866 GROW;
8867 if (oldbase != ctxt->input->base) {
8868 long delta = ctxt->input->base - oldbase;
8869 start = start + delta;
8870 in = in + delta;
8871 }
8872 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008873 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008874 if (normalize) {
8875 /*
8876 * Skip any leading spaces
8877 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008878 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008879 ((*in == 0x20) || (*in == 0x9) ||
8880 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008881 if (*in == 0xA) {
8882 line++; col = 1;
8883 } else {
8884 col++;
8885 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008886 in++;
8887 start = in;
8888 if (in >= end) {
8889 const xmlChar *oldbase = ctxt->input->base;
8890 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008891 if (ctxt->instate == XML_PARSER_EOF)
8892 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008893 if (oldbase != ctxt->input->base) {
8894 long delta = ctxt->input->base - oldbase;
8895 start = start + delta;
8896 in = in + delta;
8897 }
8898 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008899 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8900 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8901 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008902 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008903 return(NULL);
8904 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008905 }
8906 }
8907 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8908 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008909 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008910 if ((*in++ == 0x20) && (*in == 0x20)) break;
8911 if (in >= end) {
8912 const xmlChar *oldbase = ctxt->input->base;
8913 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008914 if (ctxt->instate == XML_PARSER_EOF)
8915 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008916 if (oldbase != ctxt->input->base) {
8917 long delta = ctxt->input->base - oldbase;
8918 start = start + delta;
8919 in = in + delta;
8920 }
8921 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008922 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8923 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8924 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008925 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008926 return(NULL);
8927 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008928 }
8929 }
8930 last = in;
8931 /*
8932 * skip the trailing blanks
8933 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008934 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008935 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008936 ((*in == 0x20) || (*in == 0x9) ||
8937 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008938 if (*in == 0xA) {
8939 line++, col = 1;
8940 } else {
8941 col++;
8942 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008943 in++;
8944 if (in >= end) {
8945 const xmlChar *oldbase = ctxt->input->base;
8946 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008947 if (ctxt->instate == XML_PARSER_EOF)
8948 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008949 if (oldbase != ctxt->input->base) {
8950 long delta = ctxt->input->base - oldbase;
8951 start = start + delta;
8952 in = in + delta;
8953 last = last + delta;
8954 }
8955 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008956 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8957 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8958 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008959 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008960 return(NULL);
8961 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008962 }
8963 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008964 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8965 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8966 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008967 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008968 return(NULL);
8969 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008970 if (*in != limit) goto need_complex;
8971 } else {
8972 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8973 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8974 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008975 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008976 if (in >= end) {
8977 const xmlChar *oldbase = ctxt->input->base;
8978 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008979 if (ctxt->instate == XML_PARSER_EOF)
8980 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008981 if (oldbase != ctxt->input->base) {
8982 long delta = ctxt->input->base - oldbase;
8983 start = start + delta;
8984 in = in + delta;
8985 }
8986 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008987 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8988 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8989 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008990 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008991 return(NULL);
8992 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008993 }
8994 }
8995 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08008996 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8997 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8998 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008999 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009000 return(NULL);
9001 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009002 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009003 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009004 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009005 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009006 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009007 *len = last - start;
9008 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009009 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009010 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009011 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009012 }
9013 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009014 ctxt->input->line = line;
9015 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009016 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009017 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009018need_complex:
9019 if (alloc) *alloc = 1;
9020 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009021}
9022
9023/**
9024 * xmlParseAttribute2:
9025 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009026 * @pref: the element prefix
9027 * @elem: the element name
9028 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009029 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009030 * @len: an int * to save the length of the attribute
9031 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009032 *
9033 * parse an attribute in the new SAX2 framework.
9034 *
9035 * Returns the attribute name, and the value in *value, .
9036 */
9037
9038static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009039xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009040 const xmlChar * pref, const xmlChar * elem,
9041 const xmlChar ** prefix, xmlChar ** value,
9042 int *len, int *alloc)
9043{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009044 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009045 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009046 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009047
9048 *value = NULL;
9049 GROW;
9050 name = xmlParseQName(ctxt, prefix);
9051 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009052 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9053 "error parsing attribute name\n");
9054 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009055 }
9056
9057 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009058 * get the type if needed
9059 */
9060 if (ctxt->attsSpecial != NULL) {
9061 int type;
9062
9063 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009064 pref, elem, *prefix, name);
9065 if (type != 0)
9066 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009067 }
9068
9069 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009070 * read the value
9071 */
9072 SKIP_BLANKS;
9073 if (RAW == '=') {
9074 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009075 SKIP_BLANKS;
9076 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9077 if (normalize) {
9078 /*
9079 * Sometimes a second normalisation pass for spaces is needed
9080 * but that only happens if charrefs or entities refernces
9081 * have been used in the attribute value, i.e. the attribute
9082 * value have been extracted in an allocated string already.
9083 */
9084 if (*alloc) {
9085 const xmlChar *val2;
9086
9087 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009088 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009089 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009090 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009091 }
9092 }
9093 }
9094 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009096 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009097 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009098 name);
9099 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009100 }
9101
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009102 if (*prefix == ctxt->str_xml) {
9103 /*
9104 * Check that xml:lang conforms to the specification
9105 * No more registered as an error, just generate a warning now
9106 * since this was deprecated in XML second edition
9107 */
9108 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9109 internal_val = xmlStrndup(val, *len);
9110 if (!xmlCheckLanguageID(internal_val)) {
9111 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9112 "Malformed value for xml:lang : %s\n",
9113 internal_val, NULL);
9114 }
9115 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009116
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009117 /*
9118 * Check that xml:space conforms to the specification
9119 */
9120 if (xmlStrEqual(name, BAD_CAST "space")) {
9121 internal_val = xmlStrndup(val, *len);
9122 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9123 *(ctxt->space) = 0;
9124 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9125 *(ctxt->space) = 1;
9126 else {
9127 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9128 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9129 internal_val, NULL);
9130 }
9131 }
9132 if (internal_val) {
9133 xmlFree(internal_val);
9134 }
9135 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009136
9137 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009138 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009139}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009140/**
9141 * xmlParseStartTag2:
9142 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009143 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009144 * parse a start of tag either for rule element or
9145 * EmptyElement. In both case we don't parse the tag closing chars.
9146 * This routine is called when running SAX2 parsing
9147 *
9148 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9149 *
9150 * [ WFC: Unique Att Spec ]
9151 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009152 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009153 *
9154 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9155 *
9156 * [ WFC: Unique Att Spec ]
9157 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009158 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009159 *
9160 * With namespace:
9161 *
9162 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9163 *
9164 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9165 *
9166 * Returns the element name parsed
9167 */
9168
9169static const xmlChar *
9170xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009171 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009172 const xmlChar *localname;
9173 const xmlChar *prefix;
9174 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009175 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009176 const xmlChar *nsname;
9177 xmlChar *attvalue;
9178 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009179 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009180 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009181 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009182 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009183 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009184
9185 if (RAW != '<') return(NULL);
9186 NEXT1;
9187
9188 /*
9189 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9190 * point since the attribute values may be stored as pointers to
9191 * the buffer and calling SHRINK would destroy them !
9192 * The Shrinking is only possible once the full set of attribute
9193 * callbacks have been done.
9194 */
9195 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009196 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009197 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009198 nbatts = 0;
9199 nratts = 0;
9200 nbdef = 0;
9201 nbNs = 0;
9202 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009203 /* Forget any namespaces added during an earlier parse of this element. */
9204 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009205
9206 localname = xmlParseQName(ctxt, &prefix);
9207 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009208 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9209 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009210 return(NULL);
9211 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009212 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009213
9214 /*
9215 * Now parse the attributes, it ends up with the ending
9216 *
9217 * (S Attribute)* S?
9218 */
9219 SKIP_BLANKS;
9220 GROW;
9221
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009222 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009223 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009224 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009225 const xmlChar *q = CUR_PTR;
9226 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009227 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009228
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009229 attname = xmlParseAttribute2(ctxt, prefix, localname,
9230 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009231 if ((attname == NULL) || (attvalue == NULL))
9232 goto next_attr;
9233 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009234
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009235 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9236 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9237 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009238
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009239 if (URL == NULL) {
9240 xmlErrMemory(ctxt, "dictionary allocation failure");
9241 if ((attvalue != NULL) && (alloc != 0))
9242 xmlFree(attvalue);
9243 return(NULL);
9244 }
9245 if (*URL != 0) {
9246 uri = xmlParseURI((const char *) URL);
9247 if (uri == NULL) {
9248 xmlNsErr(ctxt, XML_WAR_NS_URI,
9249 "xmlns: '%s' is not a valid URI\n",
9250 URL, NULL, NULL);
9251 } else {
9252 if (uri->scheme == NULL) {
9253 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9254 "xmlns: URI %s is not absolute\n",
9255 URL, NULL, NULL);
9256 }
9257 xmlFreeURI(uri);
9258 }
Daniel Veillard37334572008-07-31 08:20:02 +00009259 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009260 if (attname != ctxt->str_xml) {
9261 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9262 "xml namespace URI cannot be the default namespace\n",
9263 NULL, NULL, NULL);
9264 }
9265 goto next_attr;
9266 }
9267 if ((len == 29) &&
9268 (xmlStrEqual(URL,
9269 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9270 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9271 "reuse of the xmlns namespace name is forbidden\n",
9272 NULL, NULL, NULL);
9273 goto next_attr;
9274 }
9275 }
9276 /*
9277 * check that it's not a defined namespace
9278 */
9279 for (j = 1;j <= nbNs;j++)
9280 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9281 break;
9282 if (j <= nbNs)
9283 xmlErrAttributeDup(ctxt, NULL, attname);
9284 else
9285 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009286
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009287 } else if (aprefix == ctxt->str_xmlns) {
9288 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9289 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009290
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009291 if (attname == ctxt->str_xml) {
9292 if (URL != ctxt->str_xml_ns) {
9293 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9294 "xml namespace prefix mapped to wrong URI\n",
9295 NULL, NULL, NULL);
9296 }
9297 /*
9298 * Do not keep a namespace definition node
9299 */
9300 goto next_attr;
9301 }
9302 if (URL == ctxt->str_xml_ns) {
9303 if (attname != ctxt->str_xml) {
9304 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9305 "xml namespace URI mapped to wrong prefix\n",
9306 NULL, NULL, NULL);
9307 }
9308 goto next_attr;
9309 }
9310 if (attname == ctxt->str_xmlns) {
9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312 "redefinition of the xmlns prefix is forbidden\n",
9313 NULL, NULL, NULL);
9314 goto next_attr;
9315 }
9316 if ((len == 29) &&
9317 (xmlStrEqual(URL,
9318 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9319 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9320 "reuse of the xmlns namespace name is forbidden\n",
9321 NULL, NULL, NULL);
9322 goto next_attr;
9323 }
9324 if ((URL == NULL) || (URL[0] == 0)) {
9325 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9326 "xmlns:%s: Empty XML namespace is not allowed\n",
9327 attname, NULL, NULL);
9328 goto next_attr;
9329 } else {
9330 uri = xmlParseURI((const char *) URL);
9331 if (uri == NULL) {
9332 xmlNsErr(ctxt, XML_WAR_NS_URI,
9333 "xmlns:%s: '%s' is not a valid URI\n",
9334 attname, URL, NULL);
9335 } else {
9336 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9337 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9338 "xmlns:%s: URI %s is not absolute\n",
9339 attname, URL, NULL);
9340 }
9341 xmlFreeURI(uri);
9342 }
9343 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009344
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009345 /*
9346 * check that it's not a defined namespace
9347 */
9348 for (j = 1;j <= nbNs;j++)
9349 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9350 break;
9351 if (j <= nbNs)
9352 xmlErrAttributeDup(ctxt, aprefix, attname);
9353 else
9354 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9355
9356 } else {
9357 /*
9358 * Add the pair to atts
9359 */
9360 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9361 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9362 goto next_attr;
9363 }
9364 maxatts = ctxt->maxatts;
9365 atts = ctxt->atts;
9366 }
9367 ctxt->attallocs[nratts++] = alloc;
9368 atts[nbatts++] = attname;
9369 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009370 /*
9371 * The namespace URI field is used temporarily to point at the
9372 * base of the current input buffer for non-alloced attributes.
9373 * When the input buffer is reallocated, all the pointers become
9374 * invalid, but they can be reconstructed later.
9375 */
9376 if (alloc)
9377 atts[nbatts++] = NULL;
9378 else
9379 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009380 atts[nbatts++] = attvalue;
9381 attvalue += len;
9382 atts[nbatts++] = attvalue;
9383 /*
9384 * tag if some deallocation is needed
9385 */
9386 if (alloc != 0) attval = 1;
9387 attvalue = NULL; /* moved into atts */
9388 }
9389
9390next_attr:
9391 if ((attvalue != NULL) && (alloc != 0)) {
9392 xmlFree(attvalue);
9393 attvalue = NULL;
9394 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009395
9396 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009397 if (ctxt->instate == XML_PARSER_EOF)
9398 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009399 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9400 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009401 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009402 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9403 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009404 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009405 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009406 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9407 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009408 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009409 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009410 break;
9411 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009412 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009413 }
9414
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009415 if (ctxt->input->id != inputid) {
9416 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9417 "Unexpected change of input\n");
9418 localname = NULL;
9419 goto done;
9420 }
9421
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009422 /* Reconstruct attribute value pointers. */
9423 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9424 if (atts[i+2] != NULL) {
9425 /*
9426 * Arithmetic on dangling pointers is technically undefined
9427 * behavior, but well...
9428 */
9429 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9430 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9431 atts[i+3] += offset; /* value */
9432 atts[i+4] += offset; /* valuend */
9433 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009434 }
9435
Daniel Veillard0fb18932003-09-07 09:14:37 +00009436 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009437 * The attributes defaulting
9438 */
9439 if (ctxt->attsDefault != NULL) {
9440 xmlDefAttrsPtr defaults;
9441
9442 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9443 if (defaults != NULL) {
9444 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009445 attname = defaults->values[5 * i];
9446 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009447
9448 /*
9449 * special work for namespaces defaulted defs
9450 */
9451 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9452 /*
9453 * check that it's not a defined namespace
9454 */
9455 for (j = 1;j <= nbNs;j++)
9456 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9457 break;
9458 if (j <= nbNs) continue;
9459
9460 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009461 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009462 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009463 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009464 nbNs++;
9465 }
9466 } else if (aprefix == ctxt->str_xmlns) {
9467 /*
9468 * check that it's not a defined namespace
9469 */
9470 for (j = 1;j <= nbNs;j++)
9471 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9472 break;
9473 if (j <= nbNs) continue;
9474
9475 nsname = xmlGetNamespace(ctxt, attname);
9476 if (nsname != defaults->values[2]) {
9477 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009478 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009479 nbNs++;
9480 }
9481 } else {
9482 /*
9483 * check that it's not a defined attribute
9484 */
9485 for (j = 0;j < nbatts;j+=5) {
9486 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9487 break;
9488 }
9489 if (j < nbatts) continue;
9490
9491 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9492 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009493 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009494 }
9495 maxatts = ctxt->maxatts;
9496 atts = ctxt->atts;
9497 }
9498 atts[nbatts++] = attname;
9499 atts[nbatts++] = aprefix;
9500 if (aprefix == NULL)
9501 atts[nbatts++] = NULL;
9502 else
9503 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009504 atts[nbatts++] = defaults->values[5 * i + 2];
9505 atts[nbatts++] = defaults->values[5 * i + 3];
9506 if ((ctxt->standalone == 1) &&
9507 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009508 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009509 "standalone: attribute %s on %s defaulted from external subset\n",
9510 attname, localname);
9511 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009512 nbdef++;
9513 }
9514 }
9515 }
9516 }
9517
Daniel Veillarde70c8772003-11-25 07:21:18 +00009518 /*
9519 * The attributes checkings
9520 */
9521 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009522 /*
9523 * The default namespace does not apply to attribute names.
9524 */
9525 if (atts[i + 1] != NULL) {
9526 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9527 if (nsname == NULL) {
9528 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9529 "Namespace prefix %s for %s on %s is not defined\n",
9530 atts[i + 1], atts[i], localname);
9531 }
9532 atts[i + 2] = nsname;
9533 } else
9534 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009535 /*
9536 * [ WFC: Unique Att Spec ]
9537 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009538 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009539 * As extended by the Namespace in XML REC.
9540 */
9541 for (j = 0; j < i;j += 5) {
9542 if (atts[i] == atts[j]) {
9543 if (atts[i+1] == atts[j+1]) {
9544 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9545 break;
9546 }
9547 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9548 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9549 "Namespaced Attribute %s in '%s' redefined\n",
9550 atts[i], nsname, NULL);
9551 break;
9552 }
9553 }
9554 }
9555 }
9556
Daniel Veillarde57ec792003-09-10 10:50:59 +00009557 nsname = xmlGetNamespace(ctxt, prefix);
9558 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009559 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9560 "Namespace prefix %s on %s is not defined\n",
9561 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009562 }
9563 *pref = prefix;
9564 *URI = nsname;
9565
9566 /*
9567 * SAX: Start of Element !
9568 */
9569 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9570 (!ctxt->disableSAX)) {
9571 if (nbNs > 0)
9572 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9573 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9574 nbatts / 5, nbdef, atts);
9575 else
9576 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9577 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9578 }
9579
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009580done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009581 /*
9582 * Free up attribute allocated strings if needed
9583 */
9584 if (attval != 0) {
9585 for (i = 3,j = 0; j < nratts;i += 5,j++)
9586 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9587 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009588 }
9589
9590 return(localname);
9591}
9592
9593/**
9594 * xmlParseEndTag2:
9595 * @ctxt: an XML parser context
9596 * @line: line of the start tag
9597 * @nsNr: number of namespaces on the start tag
9598 *
9599 * parse an end of tag
9600 *
9601 * [42] ETag ::= '</' Name S? '>'
9602 *
9603 * With namespace
9604 *
9605 * [NS 9] ETag ::= '</' QName S? '>'
9606 */
9607
9608static void
9609xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009610 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009611 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009612 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009613
9614 GROW;
9615 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009616 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009617 return;
9618 }
9619 SKIP(2);
9620
David Kilzerdb07dd62016-02-12 09:58:29 -08009621 curLength = ctxt->input->end - ctxt->input->cur;
9622 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9623 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9624 if ((curLength >= (size_t)(tlen + 1)) &&
9625 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009626 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009627 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009628 goto done;
9629 }
9630 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009631 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009632 name = (xmlChar*)1;
9633 } else {
9634 if (prefix == NULL)
9635 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9636 else
9637 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9638 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009639
9640 /*
9641 * We should definitely be at the ending "S? '>'" part
9642 */
9643 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009644 if (ctxt->instate == XML_PARSER_EOF)
9645 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009646 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009647 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009648 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009649 } else
9650 NEXT1;
9651
9652 /*
9653 * [ WFC: Element Type Match ]
9654 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009655 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009656 *
9657 */
9658 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009659 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009660 if ((line == 0) && (ctxt->node != NULL))
9661 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009662 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009663 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009664 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009665 }
9666
9667 /*
9668 * SAX: End of Tag
9669 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009670done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009671 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9672 (!ctxt->disableSAX))
9673 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9674
Daniel Veillard0fb18932003-09-07 09:14:37 +00009675 spacePop(ctxt);
9676 if (nsNr != 0)
9677 nsPop(ctxt, nsNr);
9678 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009679}
9680
9681/**
Owen Taylor3473f882001-02-23 17:55:21 +00009682 * xmlParseCDSect:
9683 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009684 *
Owen Taylor3473f882001-02-23 17:55:21 +00009685 * Parse escaped pure raw content.
9686 *
9687 * [18] CDSect ::= CDStart CData CDEnd
9688 *
9689 * [19] CDStart ::= '<![CDATA['
9690 *
9691 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9692 *
9693 * [21] CDEnd ::= ']]>'
9694 */
9695void
9696xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9697 xmlChar *buf = NULL;
9698 int len = 0;
9699 int size = XML_PARSER_BUFFER_SIZE;
9700 int r, rl;
9701 int s, sl;
9702 int cur, l;
9703 int count = 0;
9704
Daniel Veillard8f597c32003-10-06 08:19:27 +00009705 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009706 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009707 SKIP(9);
9708 } else
9709 return;
9710
9711 ctxt->instate = XML_PARSER_CDATA_SECTION;
9712 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009713 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009714 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009715 ctxt->instate = XML_PARSER_CONTENT;
9716 return;
9717 }
9718 NEXTL(rl);
9719 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009720 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009721 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009722 ctxt->instate = XML_PARSER_CONTENT;
9723 return;
9724 }
9725 NEXTL(sl);
9726 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009727 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009728 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009729 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009730 return;
9731 }
William M. Brack871611b2003-10-18 04:53:14 +00009732 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009733 ((r != ']') || (s != ']') || (cur != '>'))) {
9734 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009735 xmlChar *tmp;
9736
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009737 if ((size > XML_MAX_TEXT_LENGTH) &&
9738 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9739 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9740 "CData section too big found", NULL);
9741 xmlFree (buf);
9742 return;
9743 }
9744 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009745 if (tmp == NULL) {
9746 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009747 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009748 return;
9749 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009750 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009751 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009752 }
9753 COPY_BUF(rl,buf,len,r);
9754 r = s;
9755 rl = sl;
9756 s = cur;
9757 sl = l;
9758 count++;
9759 if (count > 50) {
9760 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009761 if (ctxt->instate == XML_PARSER_EOF) {
9762 xmlFree(buf);
9763 return;
9764 }
Owen Taylor3473f882001-02-23 17:55:21 +00009765 count = 0;
9766 }
9767 NEXTL(l);
9768 cur = CUR_CHAR(l);
9769 }
9770 buf[len] = 0;
9771 ctxt->instate = XML_PARSER_CONTENT;
9772 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009773 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009774 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009775 xmlFree(buf);
9776 return;
9777 }
9778 NEXTL(l);
9779
9780 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009781 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009782 */
9783 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9784 if (ctxt->sax->cdataBlock != NULL)
9785 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009786 else if (ctxt->sax->characters != NULL)
9787 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009788 }
9789 xmlFree(buf);
9790}
9791
9792/**
9793 * xmlParseContent:
9794 * @ctxt: an XML parser context
9795 *
9796 * Parse a content:
9797 *
9798 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9799 */
9800
9801void
9802xmlParseContent(xmlParserCtxtPtr ctxt) {
9803 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009804 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009805 ((RAW != '<') || (NXT(1) != '/')) &&
9806 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009807 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009808 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009809 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009810
9811 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009812 * First case : a Processing Instruction.
9813 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009814 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009815 xmlParsePI(ctxt);
9816 }
9817
9818 /*
9819 * Second case : a CDSection
9820 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009821 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009822 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009823 xmlParseCDSect(ctxt);
9824 }
9825
9826 /*
9827 * Third case : a comment
9828 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009829 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009830 (NXT(2) == '-') && (NXT(3) == '-')) {
9831 xmlParseComment(ctxt);
9832 ctxt->instate = XML_PARSER_CONTENT;
9833 }
9834
9835 /*
9836 * Fourth case : a sub-element.
9837 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009838 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009839 xmlParseElement(ctxt);
9840 }
9841
9842 /*
9843 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009844 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009845 */
9846
Daniel Veillard21a0f912001-02-25 19:54:14 +00009847 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009848 xmlParseReference(ctxt);
9849 }
9850
9851 /*
9852 * Last case, text. Note that References are handled directly.
9853 */
9854 else {
9855 xmlParseCharData(ctxt, 0);
9856 }
9857
9858 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009859 SHRINK;
9860
Daniel Veillardfdc91562002-07-01 21:52:03 +00009861 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009862 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9863 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009864 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009865 break;
9866 }
9867 }
9868}
9869
9870/**
9871 * xmlParseElement:
9872 * @ctxt: an XML parser context
9873 *
9874 * parse an XML element, this is highly recursive
9875 *
9876 * [39] element ::= EmptyElemTag | STag content ETag
9877 *
9878 * [ WFC: Element Type Match ]
9879 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009880 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009881 *
Owen Taylor3473f882001-02-23 17:55:21 +00009882 */
9883
9884void
9885xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009886 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009887 const xmlChar *prefix = NULL;
9888 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009889 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009890 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009891 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009892 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009893
Daniel Veillard8915c152008-08-26 13:05:34 +00009894 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9895 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9896 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9897 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9898 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08009899 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009900 return;
9901 }
9902
Owen Taylor3473f882001-02-23 17:55:21 +00009903 /* Capture start position */
9904 if (ctxt->record_info) {
9905 node_info.begin_pos = ctxt->input->consumed +
9906 (CUR_PTR - ctxt->input->base);
9907 node_info.begin_line = ctxt->input->line;
9908 }
9909
9910 if (ctxt->spaceNr == 0)
9911 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009912 else if (*ctxt->space == -2)
9913 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009914 else
9915 spacePush(ctxt, *ctxt->space);
9916
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009917 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009918#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009919 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009920#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009921 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009922#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009923 else
9924 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009925#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009926 if (ctxt->instate == XML_PARSER_EOF)
9927 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009928 if (name == NULL) {
9929 spacePop(ctxt);
9930 return;
9931 }
9932 namePush(ctxt, name);
9933 ret = ctxt->node;
9934
Daniel Veillard4432df22003-09-28 18:58:27 +00009935#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009936 /*
9937 * [ VC: Root Element Type ]
9938 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009939 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009940 */
9941 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9942 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9943 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009944#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009945
9946 /*
9947 * Check for an Empty Element.
9948 */
9949 if ((RAW == '/') && (NXT(1) == '>')) {
9950 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009951 if (ctxt->sax2) {
9952 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9953 (!ctxt->disableSAX))
9954 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009955#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009956 } else {
9957 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9958 (!ctxt->disableSAX))
9959 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009960#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009961 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009962 namePop(ctxt);
9963 spacePop(ctxt);
9964 if (nsNr != ctxt->nsNr)
9965 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009966 if ( ret != NULL && ctxt->record_info ) {
9967 node_info.end_pos = ctxt->input->consumed +
9968 (CUR_PTR - ctxt->input->base);
9969 node_info.end_line = ctxt->input->line;
9970 node_info.node = ret;
9971 xmlParserAddNodeInfo(ctxt, &node_info);
9972 }
9973 return;
9974 }
9975 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009976 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009977 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009978 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9979 "Couldn't find end of Start Tag %s line %d\n",
9980 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009981
9982 /*
9983 * end of parsing of this node.
9984 */
9985 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009986 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009987 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009988 if (nsNr != ctxt->nsNr)
9989 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009990
9991 /*
9992 * Capture end position and add node
9993 */
9994 if ( ret != NULL && ctxt->record_info ) {
9995 node_info.end_pos = ctxt->input->consumed +
9996 (CUR_PTR - ctxt->input->base);
9997 node_info.end_line = ctxt->input->line;
9998 node_info.node = ret;
9999 xmlParserAddNodeInfo(ctxt, &node_info);
10000 }
10001 return;
10002 }
10003
10004 /*
10005 * Parse the content of the element:
10006 */
10007 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010008 if (ctxt->instate == XML_PARSER_EOF)
10009 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010010 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010011 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010012 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010013 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010014
10015 /*
10016 * end of parsing of this node.
10017 */
10018 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010019 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010020 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010021 if (nsNr != ctxt->nsNr)
10022 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010023 return;
10024 }
10025
10026 /*
10027 * parse the end of tag: '</' should be here.
10028 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010029 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010030 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010031 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010032 }
10033#ifdef LIBXML_SAX1_ENABLED
10034 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010035 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010036#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010037
10038 /*
10039 * Capture end position and add node
10040 */
10041 if ( ret != NULL && ctxt->record_info ) {
10042 node_info.end_pos = ctxt->input->consumed +
10043 (CUR_PTR - ctxt->input->base);
10044 node_info.end_line = ctxt->input->line;
10045 node_info.node = ret;
10046 xmlParserAddNodeInfo(ctxt, &node_info);
10047 }
10048}
10049
10050/**
10051 * xmlParseVersionNum:
10052 * @ctxt: an XML parser context
10053 *
10054 * parse the XML version value.
10055 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010056 * [26] VersionNum ::= '1.' [0-9]+
10057 *
10058 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010059 *
10060 * Returns the string giving the XML version number, or NULL
10061 */
10062xmlChar *
10063xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10064 xmlChar *buf = NULL;
10065 int len = 0;
10066 int size = 10;
10067 xmlChar cur;
10068
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010069 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010070 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010071 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010072 return(NULL);
10073 }
10074 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010075 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010076 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010077 return(NULL);
10078 }
10079 buf[len++] = cur;
10080 NEXT;
10081 cur=CUR;
10082 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010083 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010084 return(NULL);
10085 }
10086 buf[len++] = cur;
10087 NEXT;
10088 cur=CUR;
10089 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010090 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010091 xmlChar *tmp;
10092
Owen Taylor3473f882001-02-23 17:55:21 +000010093 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010094 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10095 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010096 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010097 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010098 return(NULL);
10099 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010100 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010101 }
10102 buf[len++] = cur;
10103 NEXT;
10104 cur=CUR;
10105 }
10106 buf[len] = 0;
10107 return(buf);
10108}
10109
10110/**
10111 * xmlParseVersionInfo:
10112 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010113 *
Owen Taylor3473f882001-02-23 17:55:21 +000010114 * parse the XML version.
10115 *
10116 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010117 *
Owen Taylor3473f882001-02-23 17:55:21 +000010118 * [25] Eq ::= S? '=' S?
10119 *
10120 * Returns the version string, e.g. "1.0"
10121 */
10122
10123xmlChar *
10124xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10125 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010126
Daniel Veillarda07050d2003-10-19 14:46:32 +000010127 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010128 SKIP(7);
10129 SKIP_BLANKS;
10130 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010131 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010132 return(NULL);
10133 }
10134 NEXT;
10135 SKIP_BLANKS;
10136 if (RAW == '"') {
10137 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010138 version = xmlParseVersionNum(ctxt);
10139 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010140 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010141 } else
10142 NEXT;
10143 } else if (RAW == '\''){
10144 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010145 version = xmlParseVersionNum(ctxt);
10146 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010147 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010148 } else
10149 NEXT;
10150 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010151 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010152 }
10153 }
10154 return(version);
10155}
10156
10157/**
10158 * xmlParseEncName:
10159 * @ctxt: an XML parser context
10160 *
10161 * parse the XML encoding name
10162 *
10163 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10164 *
10165 * Returns the encoding name value or NULL
10166 */
10167xmlChar *
10168xmlParseEncName(xmlParserCtxtPtr ctxt) {
10169 xmlChar *buf = NULL;
10170 int len = 0;
10171 int size = 10;
10172 xmlChar cur;
10173
10174 cur = CUR;
10175 if (((cur >= 'a') && (cur <= 'z')) ||
10176 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010177 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010178 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010179 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010180 return(NULL);
10181 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010182
Owen Taylor3473f882001-02-23 17:55:21 +000010183 buf[len++] = cur;
10184 NEXT;
10185 cur = CUR;
10186 while (((cur >= 'a') && (cur <= 'z')) ||
10187 ((cur >= 'A') && (cur <= 'Z')) ||
10188 ((cur >= '0') && (cur <= '9')) ||
10189 (cur == '.') || (cur == '_') ||
10190 (cur == '-')) {
10191 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010192 xmlChar *tmp;
10193
Owen Taylor3473f882001-02-23 17:55:21 +000010194 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010195 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10196 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010197 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010198 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010199 return(NULL);
10200 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010201 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010202 }
10203 buf[len++] = cur;
10204 NEXT;
10205 cur = CUR;
10206 if (cur == 0) {
10207 SHRINK;
10208 GROW;
10209 cur = CUR;
10210 }
10211 }
10212 buf[len] = 0;
10213 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010214 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010215 }
10216 return(buf);
10217}
10218
10219/**
10220 * xmlParseEncodingDecl:
10221 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010222 *
Owen Taylor3473f882001-02-23 17:55:21 +000010223 * parse the XML encoding declaration
10224 *
10225 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10226 *
10227 * this setups the conversion filters.
10228 *
10229 * Returns the encoding value or NULL
10230 */
10231
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010232const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010233xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10234 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010235
10236 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010237 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010238 SKIP(8);
10239 SKIP_BLANKS;
10240 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010241 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010242 return(NULL);
10243 }
10244 NEXT;
10245 SKIP_BLANKS;
10246 if (RAW == '"') {
10247 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010248 encoding = xmlParseEncName(ctxt);
10249 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010250 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010251 xmlFree((xmlChar *) encoding);
10252 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010253 } else
10254 NEXT;
10255 } else if (RAW == '\''){
10256 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010257 encoding = xmlParseEncName(ctxt);
10258 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010259 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010260 xmlFree((xmlChar *) encoding);
10261 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010262 } else
10263 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010264 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010265 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010266 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010267
10268 /*
10269 * Non standard parsing, allowing the user to ignore encoding
10270 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010271 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10272 xmlFree((xmlChar *) encoding);
10273 return(NULL);
10274 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010275
Daniel Veillard6b621b82003-08-11 15:03:34 +000010276 /*
10277 * UTF-16 encoding stwich has already taken place at this stage,
10278 * more over the little-endian/big-endian selection is already done
10279 */
10280 if ((encoding != NULL) &&
10281 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10282 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010283 /*
10284 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010285 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010286 * document is apparently UTF-8 compatible, then raise an
10287 * encoding mismatch fatal error
10288 */
10289 if ((ctxt->encoding == NULL) &&
10290 (ctxt->input->buf != NULL) &&
10291 (ctxt->input->buf->encoder == NULL)) {
10292 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10293 "Document labelled UTF-16 but has UTF-8 content\n");
10294 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010295 if (ctxt->encoding != NULL)
10296 xmlFree((xmlChar *) ctxt->encoding);
10297 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010298 }
10299 /*
10300 * UTF-8 encoding is handled natively
10301 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010302 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010303 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10304 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010305 if (ctxt->encoding != NULL)
10306 xmlFree((xmlChar *) ctxt->encoding);
10307 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010308 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010309 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010310 xmlCharEncodingHandlerPtr handler;
10311
10312 if (ctxt->input->encoding != NULL)
10313 xmlFree((xmlChar *) ctxt->input->encoding);
10314 ctxt->input->encoding = encoding;
10315
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010316 handler = xmlFindCharEncodingHandler((const char *) encoding);
10317 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010318 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10319 /* failed to convert */
10320 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10321 return(NULL);
10322 }
Owen Taylor3473f882001-02-23 17:55:21 +000010323 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010324 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010325 "Unsupported encoding %s\n", encoding);
10326 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010327 }
10328 }
10329 }
10330 return(encoding);
10331}
10332
10333/**
10334 * xmlParseSDDecl:
10335 * @ctxt: an XML parser context
10336 *
10337 * parse the XML standalone declaration
10338 *
10339 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010340 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010341 *
10342 * [ VC: Standalone Document Declaration ]
10343 * TODO The standalone document declaration must have the value "no"
10344 * if any external markup declarations contain declarations of:
10345 * - attributes with default values, if elements to which these
10346 * attributes apply appear in the document without specifications
10347 * of values for these attributes, or
10348 * - entities (other than amp, lt, gt, apos, quot), if references
10349 * to those entities appear in the document, or
10350 * - attributes with values subject to normalization, where the
10351 * attribute appears in the document with a value which will change
10352 * as a result of normalization, or
10353 * - element types with element content, if white space occurs directly
10354 * within any instance of those types.
10355 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010356 * Returns:
10357 * 1 if standalone="yes"
10358 * 0 if standalone="no"
10359 * -2 if standalone attribute is missing or invalid
10360 * (A standalone value of -2 means that the XML declaration was found,
10361 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010362 */
10363
10364int
10365xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010366 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010367
10368 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010369 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010370 SKIP(10);
10371 SKIP_BLANKS;
10372 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010373 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010374 return(standalone);
10375 }
10376 NEXT;
10377 SKIP_BLANKS;
10378 if (RAW == '\''){
10379 NEXT;
10380 if ((RAW == 'n') && (NXT(1) == 'o')) {
10381 standalone = 0;
10382 SKIP(2);
10383 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10384 (NXT(2) == 's')) {
10385 standalone = 1;
10386 SKIP(3);
10387 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010388 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010389 }
10390 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010391 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010392 } else
10393 NEXT;
10394 } else if (RAW == '"'){
10395 NEXT;
10396 if ((RAW == 'n') && (NXT(1) == 'o')) {
10397 standalone = 0;
10398 SKIP(2);
10399 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10400 (NXT(2) == 's')) {
10401 standalone = 1;
10402 SKIP(3);
10403 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010404 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010405 }
10406 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010407 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010408 } else
10409 NEXT;
10410 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010411 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010412 }
10413 }
10414 return(standalone);
10415}
10416
10417/**
10418 * xmlParseXMLDecl:
10419 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010420 *
Owen Taylor3473f882001-02-23 17:55:21 +000010421 * parse an XML declaration header
10422 *
10423 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10424 */
10425
10426void
10427xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10428 xmlChar *version;
10429
10430 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010431 * This value for standalone indicates that the document has an
10432 * XML declaration but it does not have a standalone attribute.
10433 * It will be overwritten later if a standalone attribute is found.
10434 */
10435 ctxt->input->standalone = -2;
10436
10437 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010438 * We know that '<?xml' is here.
10439 */
10440 SKIP(5);
10441
William M. Brack76e95df2003-10-18 16:20:14 +000010442 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010443 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10444 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010445 }
10446 SKIP_BLANKS;
10447
10448 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010449 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010450 */
10451 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010452 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010453 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010454 } else {
10455 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10456 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010457 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010458 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010459 if (ctxt->options & XML_PARSE_OLD10) {
10460 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10461 "Unsupported version '%s'\n",
10462 version);
10463 } else {
10464 if ((version[0] == '1') && ((version[1] == '.'))) {
10465 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10466 "Unsupported version '%s'\n",
10467 version, NULL);
10468 } else {
10469 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10470 "Unsupported version '%s'\n",
10471 version);
10472 }
10473 }
Daniel Veillard19840942001-11-29 16:11:38 +000010474 }
10475 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010476 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010477 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010478 }
Owen Taylor3473f882001-02-23 17:55:21 +000010479
10480 /*
10481 * We may have the encoding declaration
10482 */
William M. Brack76e95df2003-10-18 16:20:14 +000010483 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010484 if ((RAW == '?') && (NXT(1) == '>')) {
10485 SKIP(2);
10486 return;
10487 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010489 }
10490 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010491 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10492 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010493 /*
10494 * The XML REC instructs us to stop parsing right here
10495 */
10496 return;
10497 }
10498
10499 /*
10500 * We may have the standalone status.
10501 */
William M. Brack76e95df2003-10-18 16:20:14 +000010502 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010503 if ((RAW == '?') && (NXT(1) == '>')) {
10504 SKIP(2);
10505 return;
10506 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010507 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010508 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010509
10510 /*
10511 * We can grow the input buffer freely at that point
10512 */
10513 GROW;
10514
Owen Taylor3473f882001-02-23 17:55:21 +000010515 SKIP_BLANKS;
10516 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10517
10518 SKIP_BLANKS;
10519 if ((RAW == '?') && (NXT(1) == '>')) {
10520 SKIP(2);
10521 } else if (RAW == '>') {
10522 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010523 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010524 NEXT;
10525 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010526 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010527 MOVETO_ENDTAG(CUR_PTR);
10528 NEXT;
10529 }
10530}
10531
10532/**
10533 * xmlParseMisc:
10534 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010535 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010536 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010537 *
10538 * [27] Misc ::= Comment | PI | S
10539 */
10540
10541void
10542xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010543 while ((ctxt->instate != XML_PARSER_EOF) &&
10544 (((RAW == '<') && (NXT(1) == '?')) ||
10545 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10546 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010547 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010548 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010549 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010550 NEXT;
10551 } else
10552 xmlParseComment(ctxt);
10553 }
10554}
10555
10556/**
10557 * xmlParseDocument:
10558 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010559 *
Owen Taylor3473f882001-02-23 17:55:21 +000010560 * parse an XML document (and build a tree if using the standard SAX
10561 * interface).
10562 *
10563 * [1] document ::= prolog element Misc*
10564 *
10565 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10566 *
10567 * Returns 0, -1 in case of error. the parser context is augmented
10568 * as a result of the parsing.
10569 */
10570
10571int
10572xmlParseDocument(xmlParserCtxtPtr ctxt) {
10573 xmlChar start[4];
10574 xmlCharEncoding enc;
10575
10576 xmlInitParser();
10577
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010578 if ((ctxt == NULL) || (ctxt->input == NULL))
10579 return(-1);
10580
Owen Taylor3473f882001-02-23 17:55:21 +000010581 GROW;
10582
10583 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010584 * SAX: detecting the level.
10585 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010586 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010587
10588 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010589 * SAX: beginning of the document processing.
10590 */
10591 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10592 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010593 if (ctxt->instate == XML_PARSER_EOF)
10594 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010595
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010596 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010597 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010598 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010599 * Get the 4 first bytes and decode the charset
10600 * if enc != XML_CHAR_ENCODING_NONE
10601 * plug some encoding conversion routines.
10602 */
10603 start[0] = RAW;
10604 start[1] = NXT(1);
10605 start[2] = NXT(2);
10606 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010607 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010608 if (enc != XML_CHAR_ENCODING_NONE) {
10609 xmlSwitchEncoding(ctxt, enc);
10610 }
Owen Taylor3473f882001-02-23 17:55:21 +000010611 }
10612
10613
10614 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010615 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010616 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010617 }
10618
10619 /*
10620 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010621 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010622 * than just the first line, unless the amount of data is really
10623 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010624 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010625 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10626 GROW;
10627 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010628 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010629
10630 /*
10631 * Note that we will switch encoding on the fly.
10632 */
10633 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010634 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10635 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010636 /*
10637 * The XML REC instructs us to stop parsing right here
10638 */
10639 return(-1);
10640 }
10641 ctxt->standalone = ctxt->input->standalone;
10642 SKIP_BLANKS;
10643 } else {
10644 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10645 }
10646 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10647 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010648 if (ctxt->instate == XML_PARSER_EOF)
10649 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010650 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10651 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10652 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10653 }
Owen Taylor3473f882001-02-23 17:55:21 +000010654
10655 /*
10656 * The Misc part of the Prolog
10657 */
10658 GROW;
10659 xmlParseMisc(ctxt);
10660
10661 /*
10662 * Then possibly doc type declaration(s) and more Misc
10663 * (doctypedecl Misc*)?
10664 */
10665 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010666 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010667
10668 ctxt->inSubset = 1;
10669 xmlParseDocTypeDecl(ctxt);
10670 if (RAW == '[') {
10671 ctxt->instate = XML_PARSER_DTD;
10672 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010673 if (ctxt->instate == XML_PARSER_EOF)
10674 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010675 }
10676
10677 /*
10678 * Create and update the external subset.
10679 */
10680 ctxt->inSubset = 2;
10681 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10682 (!ctxt->disableSAX))
10683 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10684 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010685 if (ctxt->instate == XML_PARSER_EOF)
10686 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010687 ctxt->inSubset = 0;
10688
Daniel Veillardac4118d2008-01-11 05:27:32 +000010689 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010690
10691 ctxt->instate = XML_PARSER_PROLOG;
10692 xmlParseMisc(ctxt);
10693 }
10694
10695 /*
10696 * Time to start parsing the tree itself
10697 */
10698 GROW;
10699 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010700 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10701 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010702 } else {
10703 ctxt->instate = XML_PARSER_CONTENT;
10704 xmlParseElement(ctxt);
10705 ctxt->instate = XML_PARSER_EPILOG;
10706
10707
10708 /*
10709 * The Misc part at the end
10710 */
10711 xmlParseMisc(ctxt);
10712
Daniel Veillard561b7f82002-03-20 21:55:57 +000010713 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010714 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010715 }
10716 ctxt->instate = XML_PARSER_EOF;
10717 }
10718
10719 /*
10720 * SAX: end of the document processing.
10721 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010722 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010723 ctxt->sax->endDocument(ctxt->userData);
10724
Daniel Veillard5997aca2002-03-18 18:36:20 +000010725 /*
10726 * Remove locally kept entity definitions if the tree was not built
10727 */
10728 if ((ctxt->myDoc != NULL) &&
10729 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10730 xmlFreeDoc(ctxt->myDoc);
10731 ctxt->myDoc = NULL;
10732 }
10733
Daniel Veillardae0765b2008-07-31 19:54:59 +000010734 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10735 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10736 if (ctxt->valid)
10737 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10738 if (ctxt->nsWellFormed)
10739 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10740 if (ctxt->options & XML_PARSE_OLD10)
10741 ctxt->myDoc->properties |= XML_DOC_OLD10;
10742 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010743 if (! ctxt->wellFormed) {
10744 ctxt->valid = 0;
10745 return(-1);
10746 }
Owen Taylor3473f882001-02-23 17:55:21 +000010747 return(0);
10748}
10749
10750/**
10751 * xmlParseExtParsedEnt:
10752 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010753 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010754 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010755 * An external general parsed entity is well-formed if it matches the
10756 * production labeled extParsedEnt.
10757 *
10758 * [78] extParsedEnt ::= TextDecl? content
10759 *
10760 * Returns 0, -1 in case of error. the parser context is augmented
10761 * as a result of the parsing.
10762 */
10763
10764int
10765xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10766 xmlChar start[4];
10767 xmlCharEncoding enc;
10768
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010769 if ((ctxt == NULL) || (ctxt->input == NULL))
10770 return(-1);
10771
Owen Taylor3473f882001-02-23 17:55:21 +000010772 xmlDefaultSAXHandlerInit();
10773
Daniel Veillard309f81d2003-09-23 09:02:53 +000010774 xmlDetectSAX2(ctxt);
10775
Owen Taylor3473f882001-02-23 17:55:21 +000010776 GROW;
10777
10778 /*
10779 * SAX: beginning of the document processing.
10780 */
10781 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10782 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10783
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010784 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010785 * Get the 4 first bytes and decode the charset
10786 * if enc != XML_CHAR_ENCODING_NONE
10787 * plug some encoding conversion routines.
10788 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010789 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10790 start[0] = RAW;
10791 start[1] = NXT(1);
10792 start[2] = NXT(2);
10793 start[3] = NXT(3);
10794 enc = xmlDetectCharEncoding(start, 4);
10795 if (enc != XML_CHAR_ENCODING_NONE) {
10796 xmlSwitchEncoding(ctxt, enc);
10797 }
Owen Taylor3473f882001-02-23 17:55:21 +000010798 }
10799
10800
10801 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010802 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010803 }
10804
10805 /*
10806 * Check for the XMLDecl in the Prolog.
10807 */
10808 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010809 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010810
10811 /*
10812 * Note that we will switch encoding on the fly.
10813 */
10814 xmlParseXMLDecl(ctxt);
10815 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10816 /*
10817 * The XML REC instructs us to stop parsing right here
10818 */
10819 return(-1);
10820 }
10821 SKIP_BLANKS;
10822 } else {
10823 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10824 }
10825 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10826 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010827 if (ctxt->instate == XML_PARSER_EOF)
10828 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010829
10830 /*
10831 * Doing validity checking on chunk doesn't make sense
10832 */
10833 ctxt->instate = XML_PARSER_CONTENT;
10834 ctxt->validate = 0;
10835 ctxt->loadsubset = 0;
10836 ctxt->depth = 0;
10837
10838 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010839 if (ctxt->instate == XML_PARSER_EOF)
10840 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010841
Owen Taylor3473f882001-02-23 17:55:21 +000010842 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010843 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010844 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010845 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010846 }
10847
10848 /*
10849 * SAX: end of the document processing.
10850 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010851 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010852 ctxt->sax->endDocument(ctxt->userData);
10853
10854 if (! ctxt->wellFormed) return(-1);
10855 return(0);
10856}
10857
Daniel Veillard73b013f2003-09-30 12:36:01 +000010858#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010859/************************************************************************
10860 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010861 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010862 * *
10863 ************************************************************************/
10864
10865/**
10866 * xmlParseLookupSequence:
10867 * @ctxt: an XML parser context
10868 * @first: the first char to lookup
10869 * @next: the next char to lookup or zero
10870 * @third: the next char to lookup or zero
10871 *
10872 * Try to find if a sequence (first, next, third) or just (first next) or
10873 * (first) is available in the input stream.
10874 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10875 * to avoid rescanning sequences of bytes, it DOES change the state of the
10876 * parser, do not use liberally.
10877 *
10878 * Returns the index to the current parsing point if the full sequence
10879 * is available, -1 otherwise.
10880 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010881static int
Owen Taylor3473f882001-02-23 17:55:21 +000010882xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10883 xmlChar next, xmlChar third) {
10884 int base, len;
10885 xmlParserInputPtr in;
10886 const xmlChar *buf;
10887
10888 in = ctxt->input;
10889 if (in == NULL) return(-1);
10890 base = in->cur - in->base;
10891 if (base < 0) return(-1);
10892 if (ctxt->checkIndex > base)
10893 base = ctxt->checkIndex;
10894 if (in->buf == NULL) {
10895 buf = in->base;
10896 len = in->length;
10897 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010898 buf = xmlBufContent(in->buf->buffer);
10899 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010900 }
10901 /* take into account the sequence length */
10902 if (third) len -= 2;
10903 else if (next) len --;
10904 for (;base < len;base++) {
10905 if (buf[base] == first) {
10906 if (third != 0) {
10907 if ((buf[base + 1] != next) ||
10908 (buf[base + 2] != third)) continue;
10909 } else if (next != 0) {
10910 if (buf[base + 1] != next) continue;
10911 }
10912 ctxt->checkIndex = 0;
10913#ifdef DEBUG_PUSH
10914 if (next == 0)
10915 xmlGenericError(xmlGenericErrorContext,
10916 "PP: lookup '%c' found at %d\n",
10917 first, base);
10918 else if (third == 0)
10919 xmlGenericError(xmlGenericErrorContext,
10920 "PP: lookup '%c%c' found at %d\n",
10921 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010922 else
Owen Taylor3473f882001-02-23 17:55:21 +000010923 xmlGenericError(xmlGenericErrorContext,
10924 "PP: lookup '%c%c%c' found at %d\n",
10925 first, next, third, base);
10926#endif
10927 return(base - (in->cur - in->base));
10928 }
10929 }
10930 ctxt->checkIndex = base;
10931#ifdef DEBUG_PUSH
10932 if (next == 0)
10933 xmlGenericError(xmlGenericErrorContext,
10934 "PP: lookup '%c' failed\n", first);
10935 else if (third == 0)
10936 xmlGenericError(xmlGenericErrorContext,
10937 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010938 else
Owen Taylor3473f882001-02-23 17:55:21 +000010939 xmlGenericError(xmlGenericErrorContext,
10940 "PP: lookup '%c%c%c' failed\n", first, next, third);
10941#endif
10942 return(-1);
10943}
10944
10945/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010946 * xmlParseGetLasts:
10947 * @ctxt: an XML parser context
10948 * @lastlt: pointer to store the last '<' from the input
10949 * @lastgt: pointer to store the last '>' from the input
10950 *
10951 * Lookup the last < and > in the current chunk
10952 */
10953static void
10954xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10955 const xmlChar **lastgt) {
10956 const xmlChar *tmp;
10957
10958 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10959 xmlGenericError(xmlGenericErrorContext,
10960 "Internal error: xmlParseGetLasts\n");
10961 return;
10962 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010963 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010964 tmp = ctxt->input->end;
10965 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010966 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010967 if (tmp < ctxt->input->base) {
10968 *lastlt = NULL;
10969 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010970 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010971 *lastlt = tmp;
10972 tmp++;
10973 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10974 if (*tmp == '\'') {
10975 tmp++;
10976 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10977 if (tmp < ctxt->input->end) tmp++;
10978 } else if (*tmp == '"') {
10979 tmp++;
10980 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10981 if (tmp < ctxt->input->end) tmp++;
10982 } else
10983 tmp++;
10984 }
10985 if (tmp < ctxt->input->end)
10986 *lastgt = tmp;
10987 else {
10988 tmp = *lastlt;
10989 tmp--;
10990 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10991 if (tmp >= ctxt->input->base)
10992 *lastgt = tmp;
10993 else
10994 *lastgt = NULL;
10995 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010996 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010997 } else {
10998 *lastlt = NULL;
10999 *lastgt = NULL;
11000 }
11001}
11002/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011003 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011004 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011005 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011006 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011007 *
11008 * Check that the block of characters is okay as SCdata content [20]
11009 *
11010 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011011 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011012 */
11013static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011014xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011015 int ix;
11016 unsigned char c;
11017 int codepoint;
11018
11019 if ((utf == NULL) || (len <= 0))
11020 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011021
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011022 for (ix = 0; ix < len;) { /* string is 0-terminated */
11023 c = utf[ix];
11024 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11025 if (c >= 0x20)
11026 ix++;
11027 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11028 ix++;
11029 else
11030 return(-ix);
11031 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011032 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011033 if ((utf[ix+1] & 0xc0 ) != 0x80)
11034 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011035 codepoint = (utf[ix] & 0x1f) << 6;
11036 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011037 if (!xmlIsCharQ(codepoint))
11038 return(-ix);
11039 ix += 2;
11040 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011041 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011042 if (((utf[ix+1] & 0xc0) != 0x80) ||
11043 ((utf[ix+2] & 0xc0) != 0x80))
11044 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011045 codepoint = (utf[ix] & 0xf) << 12;
11046 codepoint |= (utf[ix+1] & 0x3f) << 6;
11047 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011048 if (!xmlIsCharQ(codepoint))
11049 return(-ix);
11050 ix += 3;
11051 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011052 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011053 if (((utf[ix+1] & 0xc0) != 0x80) ||
11054 ((utf[ix+2] & 0xc0) != 0x80) ||
11055 ((utf[ix+3] & 0xc0) != 0x80))
11056 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011057 codepoint = (utf[ix] & 0x7) << 18;
11058 codepoint |= (utf[ix+1] & 0x3f) << 12;
11059 codepoint |= (utf[ix+2] & 0x3f) << 6;
11060 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011061 if (!xmlIsCharQ(codepoint))
11062 return(-ix);
11063 ix += 4;
11064 } else /* unknown encoding */
11065 return(-ix);
11066 }
11067 return(ix);
11068}
11069
11070/**
Owen Taylor3473f882001-02-23 17:55:21 +000011071 * xmlParseTryOrFinish:
11072 * @ctxt: an XML parser context
11073 * @terminate: last chunk indicator
11074 *
11075 * Try to progress on parsing
11076 *
11077 * Returns zero if no parsing was possible
11078 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011079static int
Owen Taylor3473f882001-02-23 17:55:21 +000011080xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11081 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011082 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011083 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011084 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011085
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011086 if (ctxt->input == NULL)
11087 return(0);
11088
Owen Taylor3473f882001-02-23 17:55:21 +000011089#ifdef DEBUG_PUSH
11090 switch (ctxt->instate) {
11091 case XML_PARSER_EOF:
11092 xmlGenericError(xmlGenericErrorContext,
11093 "PP: try EOF\n"); break;
11094 case XML_PARSER_START:
11095 xmlGenericError(xmlGenericErrorContext,
11096 "PP: try START\n"); break;
11097 case XML_PARSER_MISC:
11098 xmlGenericError(xmlGenericErrorContext,
11099 "PP: try MISC\n");break;
11100 case XML_PARSER_COMMENT:
11101 xmlGenericError(xmlGenericErrorContext,
11102 "PP: try COMMENT\n");break;
11103 case XML_PARSER_PROLOG:
11104 xmlGenericError(xmlGenericErrorContext,
11105 "PP: try PROLOG\n");break;
11106 case XML_PARSER_START_TAG:
11107 xmlGenericError(xmlGenericErrorContext,
11108 "PP: try START_TAG\n");break;
11109 case XML_PARSER_CONTENT:
11110 xmlGenericError(xmlGenericErrorContext,
11111 "PP: try CONTENT\n");break;
11112 case XML_PARSER_CDATA_SECTION:
11113 xmlGenericError(xmlGenericErrorContext,
11114 "PP: try CDATA_SECTION\n");break;
11115 case XML_PARSER_END_TAG:
11116 xmlGenericError(xmlGenericErrorContext,
11117 "PP: try END_TAG\n");break;
11118 case XML_PARSER_ENTITY_DECL:
11119 xmlGenericError(xmlGenericErrorContext,
11120 "PP: try ENTITY_DECL\n");break;
11121 case XML_PARSER_ENTITY_VALUE:
11122 xmlGenericError(xmlGenericErrorContext,
11123 "PP: try ENTITY_VALUE\n");break;
11124 case XML_PARSER_ATTRIBUTE_VALUE:
11125 xmlGenericError(xmlGenericErrorContext,
11126 "PP: try ATTRIBUTE_VALUE\n");break;
11127 case XML_PARSER_DTD:
11128 xmlGenericError(xmlGenericErrorContext,
11129 "PP: try DTD\n");break;
11130 case XML_PARSER_EPILOG:
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: try EPILOG\n");break;
11133 case XML_PARSER_PI:
11134 xmlGenericError(xmlGenericErrorContext,
11135 "PP: try PI\n");break;
11136 case XML_PARSER_IGNORE:
11137 xmlGenericError(xmlGenericErrorContext,
11138 "PP: try IGNORE\n");break;
11139 }
11140#endif
11141
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011142 if ((ctxt->input != NULL) &&
11143 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011144 xmlSHRINK(ctxt);
11145 ctxt->checkIndex = 0;
11146 }
11147 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011148
Daniel Veillarde50ba812013-04-11 15:54:51 +080011149 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011150 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011151 return(0);
11152
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011153 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011154 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011155 avail = ctxt->input->length -
11156 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011157 else {
11158 /*
11159 * If we are operating on converted input, try to flush
11160 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011161 * buffer. But do not do this in document start where
11162 * encoding="..." may not have been read and we work on a
11163 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011164 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011165 if ((ctxt->instate != XML_PARSER_START) &&
11166 (ctxt->input->buf->raw != NULL) &&
11167 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011168 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11169 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011170 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011171
11172 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011173 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11174 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011175 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011176 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011177 (ctxt->input->cur - ctxt->input->base);
11178 }
Owen Taylor3473f882001-02-23 17:55:21 +000011179 if (avail < 1)
11180 goto done;
11181 switch (ctxt->instate) {
11182 case XML_PARSER_EOF:
11183 /*
11184 * Document parsing is done !
11185 */
11186 goto done;
11187 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011188 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11189 xmlChar start[4];
11190 xmlCharEncoding enc;
11191
11192 /*
11193 * Very first chars read from the document flow.
11194 */
11195 if (avail < 4)
11196 goto done;
11197
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011198 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011199 * Get the 4 first bytes and decode the charset
11200 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011201 * plug some encoding conversion routines,
11202 * else xmlSwitchEncoding will set to (default)
11203 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011204 */
11205 start[0] = RAW;
11206 start[1] = NXT(1);
11207 start[2] = NXT(2);
11208 start[3] = NXT(3);
11209 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011210 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011211 break;
11212 }
Owen Taylor3473f882001-02-23 17:55:21 +000011213
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011214 if (avail < 2)
11215 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011216 cur = ctxt->input->cur[0];
11217 next = ctxt->input->cur[1];
11218 if (cur == 0) {
11219 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11220 ctxt->sax->setDocumentLocator(ctxt->userData,
11221 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011222 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011223 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011224#ifdef DEBUG_PUSH
11225 xmlGenericError(xmlGenericErrorContext,
11226 "PP: entering EOF\n");
11227#endif
11228 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11229 ctxt->sax->endDocument(ctxt->userData);
11230 goto done;
11231 }
11232 if ((cur == '<') && (next == '?')) {
11233 /* PI or XML decl */
11234 if (avail < 5) return(ret);
11235 if ((!terminate) &&
11236 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11237 return(ret);
11238 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11239 ctxt->sax->setDocumentLocator(ctxt->userData,
11240 &xmlDefaultSAXLocator);
11241 if ((ctxt->input->cur[2] == 'x') &&
11242 (ctxt->input->cur[3] == 'm') &&
11243 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011244 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011245 ret += 5;
11246#ifdef DEBUG_PUSH
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: Parsing XML Decl\n");
11249#endif
11250 xmlParseXMLDecl(ctxt);
11251 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11252 /*
11253 * The XML REC instructs us to stop parsing right
11254 * here
11255 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011256 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011257 return(0);
11258 }
11259 ctxt->standalone = ctxt->input->standalone;
11260 if ((ctxt->encoding == NULL) &&
11261 (ctxt->input->encoding != NULL))
11262 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11263 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11264 (!ctxt->disableSAX))
11265 ctxt->sax->startDocument(ctxt->userData);
11266 ctxt->instate = XML_PARSER_MISC;
11267#ifdef DEBUG_PUSH
11268 xmlGenericError(xmlGenericErrorContext,
11269 "PP: entering MISC\n");
11270#endif
11271 } else {
11272 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11273 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11274 (!ctxt->disableSAX))
11275 ctxt->sax->startDocument(ctxt->userData);
11276 ctxt->instate = XML_PARSER_MISC;
11277#ifdef DEBUG_PUSH
11278 xmlGenericError(xmlGenericErrorContext,
11279 "PP: entering MISC\n");
11280#endif
11281 }
11282 } else {
11283 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11284 ctxt->sax->setDocumentLocator(ctxt->userData,
11285 &xmlDefaultSAXLocator);
11286 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011287 if (ctxt->version == NULL) {
11288 xmlErrMemory(ctxt, NULL);
11289 break;
11290 }
Owen Taylor3473f882001-02-23 17:55:21 +000011291 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11292 (!ctxt->disableSAX))
11293 ctxt->sax->startDocument(ctxt->userData);
11294 ctxt->instate = XML_PARSER_MISC;
11295#ifdef DEBUG_PUSH
11296 xmlGenericError(xmlGenericErrorContext,
11297 "PP: entering MISC\n");
11298#endif
11299 }
11300 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011301 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011302 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011303 const xmlChar *prefix = NULL;
11304 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011305 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011306
11307 if ((avail < 2) && (ctxt->inputNr == 1))
11308 goto done;
11309 cur = ctxt->input->cur[0];
11310 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011311 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011312 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011313 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11314 ctxt->sax->endDocument(ctxt->userData);
11315 goto done;
11316 }
11317 if (!terminate) {
11318 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011319 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011320 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011321 goto done;
11322 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11323 goto done;
11324 }
11325 }
11326 if (ctxt->spaceNr == 0)
11327 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011328 else if (*ctxt->space == -2)
11329 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011330 else
11331 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011332#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011333 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011334#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011335 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011336#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011337 else
11338 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011339#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011340 if (ctxt->instate == XML_PARSER_EOF)
11341 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011342 if (name == NULL) {
11343 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011344 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011345 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11346 ctxt->sax->endDocument(ctxt->userData);
11347 goto done;
11348 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011349#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011350 /*
11351 * [ VC: Root Element Type ]
11352 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011353 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011354 */
11355 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11356 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11357 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011358#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011359
11360 /*
11361 * Check for an Empty Element.
11362 */
11363 if ((RAW == '/') && (NXT(1) == '>')) {
11364 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011365
11366 if (ctxt->sax2) {
11367 if ((ctxt->sax != NULL) &&
11368 (ctxt->sax->endElementNs != NULL) &&
11369 (!ctxt->disableSAX))
11370 ctxt->sax->endElementNs(ctxt->userData, name,
11371 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011372 if (ctxt->nsNr - nsNr > 0)
11373 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011374#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011375 } else {
11376 if ((ctxt->sax != NULL) &&
11377 (ctxt->sax->endElement != NULL) &&
11378 (!ctxt->disableSAX))
11379 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011380#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011381 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011382 if (ctxt->instate == XML_PARSER_EOF)
11383 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011384 spacePop(ctxt);
11385 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011386 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011387 } else {
11388 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011389 }
Daniel Veillard65686452012-07-19 18:25:01 +080011390 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011391 break;
11392 }
11393 if (RAW == '>') {
11394 NEXT;
11395 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011396 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011397 "Couldn't find end of Start Tag %s\n",
11398 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011399 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011400 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011401 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011402 if (ctxt->sax2)
11403 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011404#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011405 else
11406 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011407#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011408
Daniel Veillarda880b122003-04-21 21:36:41 +000011409 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011410 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011411 break;
11412 }
11413 case XML_PARSER_CONTENT: {
11414 const xmlChar *test;
11415 unsigned int cons;
11416 if ((avail < 2) && (ctxt->inputNr == 1))
11417 goto done;
11418 cur = ctxt->input->cur[0];
11419 next = ctxt->input->cur[1];
11420
11421 test = CUR_PTR;
11422 cons = ctxt->input->consumed;
11423 if ((cur == '<') && (next == '/')) {
11424 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011425 break;
11426 } else if ((cur == '<') && (next == '?')) {
11427 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011428 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11429 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011430 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011431 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011432 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011433 ctxt->instate = XML_PARSER_CONTENT;
11434 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011435 } else if ((cur == '<') && (next != '!')) {
11436 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011437 break;
11438 } else if ((cur == '<') && (next == '!') &&
11439 (ctxt->input->cur[2] == '-') &&
11440 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011441 int term;
11442
11443 if (avail < 4)
11444 goto done;
11445 ctxt->input->cur += 4;
11446 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11447 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011448 if ((!terminate) && (term < 0)) {
11449 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011450 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011451 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011452 xmlParseComment(ctxt);
11453 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011454 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011455 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11456 (ctxt->input->cur[2] == '[') &&
11457 (ctxt->input->cur[3] == 'C') &&
11458 (ctxt->input->cur[4] == 'D') &&
11459 (ctxt->input->cur[5] == 'A') &&
11460 (ctxt->input->cur[6] == 'T') &&
11461 (ctxt->input->cur[7] == 'A') &&
11462 (ctxt->input->cur[8] == '[')) {
11463 SKIP(9);
11464 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011465 break;
11466 } else if ((cur == '<') && (next == '!') &&
11467 (avail < 9)) {
11468 goto done;
11469 } else if (cur == '&') {
11470 if ((!terminate) &&
11471 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11472 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011473 xmlParseReference(ctxt);
11474 } else {
11475 /* TODO Avoid the extra copy, handle directly !!! */
11476 /*
11477 * Goal of the following test is:
11478 * - minimize calls to the SAX 'character' callback
11479 * when they are mergeable
11480 * - handle an problem for isBlank when we only parse
11481 * a sequence of blank chars and the next one is
11482 * not available to check against '<' presence.
11483 * - tries to homogenize the differences in SAX
11484 * callbacks between the push and pull versions
11485 * of the parser.
11486 */
11487 if ((ctxt->inputNr == 1) &&
11488 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11489 if (!terminate) {
11490 if (ctxt->progressive) {
11491 if ((lastlt == NULL) ||
11492 (ctxt->input->cur > lastlt))
11493 goto done;
11494 } else if (xmlParseLookupSequence(ctxt,
11495 '<', 0, 0) < 0) {
11496 goto done;
11497 }
11498 }
11499 }
11500 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011501 xmlParseCharData(ctxt, 0);
11502 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011503 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011504 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11505 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011506 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011507 break;
11508 }
11509 break;
11510 }
11511 case XML_PARSER_END_TAG:
11512 if (avail < 2)
11513 goto done;
11514 if (!terminate) {
11515 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011516 /* > can be found unescaped in attribute values */
11517 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011518 goto done;
11519 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11520 goto done;
11521 }
11522 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011523 if (ctxt->sax2) {
11524 xmlParseEndTag2(ctxt,
11525 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11526 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011527 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011528 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011529 }
11530#ifdef LIBXML_SAX1_ENABLED
11531 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011532 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011533#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011534 if (ctxt->instate == XML_PARSER_EOF) {
11535 /* Nothing */
11536 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011537 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011538 } else {
11539 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011540 }
11541 break;
11542 case XML_PARSER_CDATA_SECTION: {
11543 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011544 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011545 * cdataBlock merge back contiguous callbacks.
11546 */
11547 int base;
11548
11549 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11550 if (base < 0) {
11551 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011552 int tmp;
11553
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011554 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011555 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011556 if (tmp < 0) {
11557 tmp = -tmp;
11558 ctxt->input->cur += tmp;
11559 goto encoding_error;
11560 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011561 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11562 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011563 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011564 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011565 else if (ctxt->sax->characters != NULL)
11566 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011567 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011568 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011569 if (ctxt->instate == XML_PARSER_EOF)
11570 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011571 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011572 ctxt->checkIndex = 0;
11573 }
11574 goto done;
11575 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011576 int tmp;
11577
David Kilzer4f8606c2016-01-05 13:38:09 -080011578 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011579 if ((tmp < 0) || (tmp != base)) {
11580 tmp = -tmp;
11581 ctxt->input->cur += tmp;
11582 goto encoding_error;
11583 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011584 if ((ctxt->sax != NULL) && (base == 0) &&
11585 (ctxt->sax->cdataBlock != NULL) &&
11586 (!ctxt->disableSAX)) {
11587 /*
11588 * Special case to provide identical behaviour
11589 * between pull and push parsers on enpty CDATA
11590 * sections
11591 */
11592 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11593 (!strncmp((const char *)&ctxt->input->cur[-9],
11594 "<![CDATA[", 9)))
11595 ctxt->sax->cdataBlock(ctxt->userData,
11596 BAD_CAST "", 0);
11597 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011598 (!ctxt->disableSAX)) {
11599 if (ctxt->sax->cdataBlock != NULL)
11600 ctxt->sax->cdataBlock(ctxt->userData,
11601 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011602 else if (ctxt->sax->characters != NULL)
11603 ctxt->sax->characters(ctxt->userData,
11604 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011605 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011606 if (ctxt->instate == XML_PARSER_EOF)
11607 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011608 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011609 ctxt->checkIndex = 0;
11610 ctxt->instate = XML_PARSER_CONTENT;
11611#ifdef DEBUG_PUSH
11612 xmlGenericError(xmlGenericErrorContext,
11613 "PP: entering CONTENT\n");
11614#endif
11615 }
11616 break;
11617 }
Owen Taylor3473f882001-02-23 17:55:21 +000011618 case XML_PARSER_MISC:
11619 SKIP_BLANKS;
11620 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011621 avail = ctxt->input->length -
11622 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011623 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011624 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011625 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011626 if (avail < 2)
11627 goto done;
11628 cur = ctxt->input->cur[0];
11629 next = ctxt->input->cur[1];
11630 if ((cur == '<') && (next == '?')) {
11631 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011632 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11633 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011634 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011635 }
Owen Taylor3473f882001-02-23 17:55:21 +000011636#ifdef DEBUG_PUSH
11637 xmlGenericError(xmlGenericErrorContext,
11638 "PP: Parsing PI\n");
11639#endif
11640 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011641 if (ctxt->instate == XML_PARSER_EOF)
11642 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011643 ctxt->instate = XML_PARSER_MISC;
11644 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011645 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011646 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011647 (ctxt->input->cur[2] == '-') &&
11648 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011649 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011650 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11651 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011652 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011653 }
Owen Taylor3473f882001-02-23 17:55:21 +000011654#ifdef DEBUG_PUSH
11655 xmlGenericError(xmlGenericErrorContext,
11656 "PP: Parsing Comment\n");
11657#endif
11658 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011659 if (ctxt->instate == XML_PARSER_EOF)
11660 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011661 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011662 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011663 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011664 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011665 (ctxt->input->cur[2] == 'D') &&
11666 (ctxt->input->cur[3] == 'O') &&
11667 (ctxt->input->cur[4] == 'C') &&
11668 (ctxt->input->cur[5] == 'T') &&
11669 (ctxt->input->cur[6] == 'Y') &&
11670 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011671 (ctxt->input->cur[8] == 'E')) {
11672 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011673 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11674 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011675 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011676 }
Owen Taylor3473f882001-02-23 17:55:21 +000011677#ifdef DEBUG_PUSH
11678 xmlGenericError(xmlGenericErrorContext,
11679 "PP: Parsing internal subset\n");
11680#endif
11681 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011682 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011683 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011684 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011685 if (ctxt->instate == XML_PARSER_EOF)
11686 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011687 if (RAW == '[') {
11688 ctxt->instate = XML_PARSER_DTD;
11689#ifdef DEBUG_PUSH
11690 xmlGenericError(xmlGenericErrorContext,
11691 "PP: entering DTD\n");
11692#endif
11693 } else {
11694 /*
11695 * Create and update the external subset.
11696 */
11697 ctxt->inSubset = 2;
11698 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11699 (ctxt->sax->externalSubset != NULL))
11700 ctxt->sax->externalSubset(ctxt->userData,
11701 ctxt->intSubName, ctxt->extSubSystem,
11702 ctxt->extSubURI);
11703 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011704 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011705 ctxt->instate = XML_PARSER_PROLOG;
11706#ifdef DEBUG_PUSH
11707 xmlGenericError(xmlGenericErrorContext,
11708 "PP: entering PROLOG\n");
11709#endif
11710 }
11711 } else if ((cur == '<') && (next == '!') &&
11712 (avail < 9)) {
11713 goto done;
11714 } else {
11715 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011716 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011717 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011718#ifdef DEBUG_PUSH
11719 xmlGenericError(xmlGenericErrorContext,
11720 "PP: entering START_TAG\n");
11721#endif
11722 }
11723 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011724 case XML_PARSER_PROLOG:
11725 SKIP_BLANKS;
11726 if (ctxt->input->buf == NULL)
11727 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11728 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011729 avail = xmlBufUse(ctxt->input->buf->buffer) -
11730 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011731 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011732 goto done;
11733 cur = ctxt->input->cur[0];
11734 next = ctxt->input->cur[1];
11735 if ((cur == '<') && (next == '?')) {
11736 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011737 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11738 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011739 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011740 }
Owen Taylor3473f882001-02-23 17:55:21 +000011741#ifdef DEBUG_PUSH
11742 xmlGenericError(xmlGenericErrorContext,
11743 "PP: Parsing PI\n");
11744#endif
11745 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011746 if (ctxt->instate == XML_PARSER_EOF)
11747 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011748 ctxt->instate = XML_PARSER_PROLOG;
11749 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011750 } else if ((cur == '<') && (next == '!') &&
11751 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11752 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011753 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11754 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011755 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011756 }
Owen Taylor3473f882001-02-23 17:55:21 +000011757#ifdef DEBUG_PUSH
11758 xmlGenericError(xmlGenericErrorContext,
11759 "PP: Parsing Comment\n");
11760#endif
11761 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011762 if (ctxt->instate == XML_PARSER_EOF)
11763 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011764 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011765 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011766 } else if ((cur == '<') && (next == '!') &&
11767 (avail < 4)) {
11768 goto done;
11769 } else {
11770 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011771 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011772 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011773 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011774#ifdef DEBUG_PUSH
11775 xmlGenericError(xmlGenericErrorContext,
11776 "PP: entering START_TAG\n");
11777#endif
11778 }
11779 break;
11780 case XML_PARSER_EPILOG:
11781 SKIP_BLANKS;
11782 if (ctxt->input->buf == NULL)
11783 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11784 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011785 avail = xmlBufUse(ctxt->input->buf->buffer) -
11786 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011787 if (avail < 2)
11788 goto done;
11789 cur = ctxt->input->cur[0];
11790 next = ctxt->input->cur[1];
11791 if ((cur == '<') && (next == '?')) {
11792 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011793 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11794 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011795 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011796 }
Owen Taylor3473f882001-02-23 17:55:21 +000011797#ifdef DEBUG_PUSH
11798 xmlGenericError(xmlGenericErrorContext,
11799 "PP: Parsing PI\n");
11800#endif
11801 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011802 if (ctxt->instate == XML_PARSER_EOF)
11803 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011804 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011805 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011806 } else if ((cur == '<') && (next == '!') &&
11807 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11808 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011809 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11810 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011811 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011812 }
Owen Taylor3473f882001-02-23 17:55:21 +000011813#ifdef DEBUG_PUSH
11814 xmlGenericError(xmlGenericErrorContext,
11815 "PP: Parsing Comment\n");
11816#endif
11817 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011818 if (ctxt->instate == XML_PARSER_EOF)
11819 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011820 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011821 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011822 } else if ((cur == '<') && (next == '!') &&
11823 (avail < 4)) {
11824 goto done;
11825 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011826 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011827 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011828#ifdef DEBUG_PUSH
11829 xmlGenericError(xmlGenericErrorContext,
11830 "PP: entering EOF\n");
11831#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011832 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011833 ctxt->sax->endDocument(ctxt->userData);
11834 goto done;
11835 }
11836 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011837 case XML_PARSER_DTD: {
11838 /*
11839 * Sorry but progressive parsing of the internal subset
11840 * is not expected to be supported. We first check that
11841 * the full content of the internal subset is available and
11842 * the parsing is launched only at that point.
11843 * Internal subset ends up with "']' S? '>'" in an unescaped
11844 * section and not in a ']]>' sequence which are conditional
11845 * sections (whoever argued to keep that crap in XML deserve
11846 * a place in hell !).
11847 */
11848 int base, i;
11849 xmlChar *buf;
11850 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011851 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011852
11853 base = ctxt->input->cur - ctxt->input->base;
11854 if (base < 0) return(0);
11855 if (ctxt->checkIndex > base)
11856 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011857 buf = xmlBufContent(ctxt->input->buf->buffer);
11858 use = xmlBufUse(ctxt->input->buf->buffer);
11859 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011860 if (quote != 0) {
11861 if (buf[base] == quote)
11862 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011863 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011864 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011865 if ((quote == 0) && (buf[base] == '<')) {
11866 int found = 0;
11867 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011868 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011869 (buf[base + 1] == '!') &&
11870 (buf[base + 2] == '-') &&
11871 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011872 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011873 if ((buf[base] == '-') &&
11874 (buf[base + 1] == '-') &&
11875 (buf[base + 2] == '>')) {
11876 found = 1;
11877 base += 2;
11878 break;
11879 }
11880 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011881 if (!found) {
11882#if 0
11883 fprintf(stderr, "unfinished comment\n");
11884#endif
11885 break; /* for */
11886 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011887 continue;
11888 }
11889 }
Owen Taylor3473f882001-02-23 17:55:21 +000011890 if (buf[base] == '"') {
11891 quote = '"';
11892 continue;
11893 }
11894 if (buf[base] == '\'') {
11895 quote = '\'';
11896 continue;
11897 }
11898 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011899#if 0
11900 fprintf(stderr, "%c%c%c%c: ", buf[base],
11901 buf[base + 1], buf[base + 2], buf[base + 3]);
11902#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011903 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011904 break;
11905 if (buf[base + 1] == ']') {
11906 /* conditional crap, skip both ']' ! */
11907 base++;
11908 continue;
11909 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011910 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011911 if (buf[base + i] == '>') {
11912#if 0
11913 fprintf(stderr, "found\n");
11914#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011915 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011916 }
11917 if (!IS_BLANK_CH(buf[base + i])) {
11918#if 0
11919 fprintf(stderr, "not found\n");
11920#endif
11921 goto not_end_of_int_subset;
11922 }
Owen Taylor3473f882001-02-23 17:55:21 +000011923 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011924#if 0
11925 fprintf(stderr, "end of stream\n");
11926#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011927 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011928
Owen Taylor3473f882001-02-23 17:55:21 +000011929 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011930not_end_of_int_subset:
11931 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011932 }
11933 /*
11934 * We didn't found the end of the Internal subset
11935 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011936 if (quote == 0)
11937 ctxt->checkIndex = base;
11938 else
11939 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011940#ifdef DEBUG_PUSH
11941 if (next == 0)
11942 xmlGenericError(xmlGenericErrorContext,
11943 "PP: lookup of int subset end filed\n");
11944#endif
11945 goto done;
11946
11947found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011948 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011949 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011950 if (ctxt->instate == XML_PARSER_EOF)
11951 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011952 ctxt->inSubset = 2;
11953 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11954 (ctxt->sax->externalSubset != NULL))
11955 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11956 ctxt->extSubSystem, ctxt->extSubURI);
11957 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011958 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011959 if (ctxt->instate == XML_PARSER_EOF)
11960 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011961 ctxt->instate = XML_PARSER_PROLOG;
11962 ctxt->checkIndex = 0;
11963#ifdef DEBUG_PUSH
11964 xmlGenericError(xmlGenericErrorContext,
11965 "PP: entering PROLOG\n");
11966#endif
11967 break;
11968 }
11969 case XML_PARSER_COMMENT:
11970 xmlGenericError(xmlGenericErrorContext,
11971 "PP: internal error, state == COMMENT\n");
11972 ctxt->instate = XML_PARSER_CONTENT;
11973#ifdef DEBUG_PUSH
11974 xmlGenericError(xmlGenericErrorContext,
11975 "PP: entering CONTENT\n");
11976#endif
11977 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011978 case XML_PARSER_IGNORE:
11979 xmlGenericError(xmlGenericErrorContext,
11980 "PP: internal error, state == IGNORE");
11981 ctxt->instate = XML_PARSER_DTD;
11982#ifdef DEBUG_PUSH
11983 xmlGenericError(xmlGenericErrorContext,
11984 "PP: entering DTD\n");
11985#endif
11986 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011987 case XML_PARSER_PI:
11988 xmlGenericError(xmlGenericErrorContext,
11989 "PP: internal error, state == PI\n");
11990 ctxt->instate = XML_PARSER_CONTENT;
11991#ifdef DEBUG_PUSH
11992 xmlGenericError(xmlGenericErrorContext,
11993 "PP: entering CONTENT\n");
11994#endif
11995 break;
11996 case XML_PARSER_ENTITY_DECL:
11997 xmlGenericError(xmlGenericErrorContext,
11998 "PP: internal error, state == ENTITY_DECL\n");
11999 ctxt->instate = XML_PARSER_DTD;
12000#ifdef DEBUG_PUSH
12001 xmlGenericError(xmlGenericErrorContext,
12002 "PP: entering DTD\n");
12003#endif
12004 break;
12005 case XML_PARSER_ENTITY_VALUE:
12006 xmlGenericError(xmlGenericErrorContext,
12007 "PP: internal error, state == ENTITY_VALUE\n");
12008 ctxt->instate = XML_PARSER_CONTENT;
12009#ifdef DEBUG_PUSH
12010 xmlGenericError(xmlGenericErrorContext,
12011 "PP: entering DTD\n");
12012#endif
12013 break;
12014 case XML_PARSER_ATTRIBUTE_VALUE:
12015 xmlGenericError(xmlGenericErrorContext,
12016 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12017 ctxt->instate = XML_PARSER_START_TAG;
12018#ifdef DEBUG_PUSH
12019 xmlGenericError(xmlGenericErrorContext,
12020 "PP: entering START_TAG\n");
12021#endif
12022 break;
12023 case XML_PARSER_SYSTEM_LITERAL:
12024 xmlGenericError(xmlGenericErrorContext,
12025 "PP: internal error, state == SYSTEM_LITERAL\n");
12026 ctxt->instate = XML_PARSER_START_TAG;
12027#ifdef DEBUG_PUSH
12028 xmlGenericError(xmlGenericErrorContext,
12029 "PP: entering START_TAG\n");
12030#endif
12031 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012032 case XML_PARSER_PUBLIC_LITERAL:
12033 xmlGenericError(xmlGenericErrorContext,
12034 "PP: internal error, state == PUBLIC_LITERAL\n");
12035 ctxt->instate = XML_PARSER_START_TAG;
12036#ifdef DEBUG_PUSH
12037 xmlGenericError(xmlGenericErrorContext,
12038 "PP: entering START_TAG\n");
12039#endif
12040 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012041 }
12042 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012043done:
Owen Taylor3473f882001-02-23 17:55:21 +000012044#ifdef DEBUG_PUSH
12045 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12046#endif
12047 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012048encoding_error:
12049 {
12050 char buffer[150];
12051
12052 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12053 ctxt->input->cur[0], ctxt->input->cur[1],
12054 ctxt->input->cur[2], ctxt->input->cur[3]);
12055 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12056 "Input is not proper UTF-8, indicate encoding !\n%s",
12057 BAD_CAST buffer, NULL);
12058 }
12059 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012060}
12061
12062/**
Daniel Veillard65686452012-07-19 18:25:01 +080012063 * xmlParseCheckTransition:
12064 * @ctxt: an XML parser context
12065 * @chunk: a char array
12066 * @size: the size in byte of the chunk
12067 *
12068 * Check depending on the current parser state if the chunk given must be
12069 * processed immediately or one need more data to advance on parsing.
12070 *
12071 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12072 */
12073static int
12074xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12075 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12076 return(-1);
12077 if (ctxt->instate == XML_PARSER_START_TAG) {
12078 if (memchr(chunk, '>', size) != NULL)
12079 return(1);
12080 return(0);
12081 }
12082 if (ctxt->progressive == XML_PARSER_COMMENT) {
12083 if (memchr(chunk, '>', size) != NULL)
12084 return(1);
12085 return(0);
12086 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012087 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12088 if (memchr(chunk, '>', size) != NULL)
12089 return(1);
12090 return(0);
12091 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012092 if (ctxt->progressive == XML_PARSER_PI) {
12093 if (memchr(chunk, '>', size) != NULL)
12094 return(1);
12095 return(0);
12096 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012097 if (ctxt->instate == XML_PARSER_END_TAG) {
12098 if (memchr(chunk, '>', size) != NULL)
12099 return(1);
12100 return(0);
12101 }
12102 if ((ctxt->progressive == XML_PARSER_DTD) ||
12103 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012104 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012105 return(1);
12106 return(0);
12107 }
Daniel Veillard65686452012-07-19 18:25:01 +080012108 return(1);
12109}
12110
12111/**
Owen Taylor3473f882001-02-23 17:55:21 +000012112 * xmlParseChunk:
12113 * @ctxt: an XML parser context
12114 * @chunk: an char array
12115 * @size: the size in byte of the chunk
12116 * @terminate: last chunk indicator
12117 *
12118 * Parse a Chunk of memory
12119 *
12120 * Returns zero if no error, the xmlParserErrors otherwise.
12121 */
12122int
12123xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12124 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012125 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012126 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012127 size_t old_avail = 0;
12128 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012129
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012130 if (ctxt == NULL)
12131 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012132 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012133 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012134 if (ctxt->instate == XML_PARSER_EOF)
12135 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012136 if (ctxt->instate == XML_PARSER_START)
12137 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012138 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12139 (chunk[size - 1] == '\r')) {
12140 end_in_lf = 1;
12141 size--;
12142 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012143
12144xmldecl_done:
12145
Owen Taylor3473f882001-02-23 17:55:21 +000012146 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12147 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012148 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12149 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012150 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012151
Daniel Veillard65686452012-07-19 18:25:01 +080012152 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012153 /*
12154 * Specific handling if we autodetected an encoding, we should not
12155 * push more than the first line ... which depend on the encoding
12156 * And only push the rest once the final encoding was detected
12157 */
12158 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12159 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012160 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012161
12162 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12163 BAD_CAST "UTF-16")) ||
12164 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12165 BAD_CAST "UTF16")))
12166 len = 90;
12167 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12168 BAD_CAST "UCS-4")) ||
12169 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12170 BAD_CAST "UCS4")))
12171 len = 180;
12172
12173 if (ctxt->input->buf->rawconsumed < len)
12174 len -= ctxt->input->buf->rawconsumed;
12175
Raul Hudeaba9716a2010-03-15 10:13:29 +010012176 /*
12177 * Change size for reading the initial declaration only
12178 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12179 * will blindly copy extra bytes from memory.
12180 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012181 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012182 remain = size - len;
12183 size = len;
12184 } else {
12185 remain = 0;
12186 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012187 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012188 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012189 if (res < 0) {
12190 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012191 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012192 return (XML_PARSER_EOF);
12193 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012194 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012195#ifdef DEBUG_PUSH
12196 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12197#endif
12198
Owen Taylor3473f882001-02-23 17:55:21 +000012199 } else if (ctxt->instate != XML_PARSER_EOF) {
12200 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12201 xmlParserInputBufferPtr in = ctxt->input->buf;
12202 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12203 (in->raw != NULL)) {
12204 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012205 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12206 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012207
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012208 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012209 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012210 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012211 xmlGenericError(xmlGenericErrorContext,
12212 "xmlParseChunk: encoder error\n");
12213 return(XML_ERR_INVALID_ENCODING);
12214 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012215 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012216 }
12217 }
12218 }
Daniel Veillard65686452012-07-19 18:25:01 +080012219 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012220 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012221 } else {
12222 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12223 avail = xmlBufUse(ctxt->input->buf->buffer);
12224 /*
12225 * Depending on the current state it may not be such
12226 * a good idea to try parsing if there is nothing in the chunk
12227 * which would be worth doing a parser state transition and we
12228 * need to wait for more data
12229 */
12230 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12231 (old_avail == 0) || (avail == 0) ||
12232 (xmlParseCheckTransition(ctxt,
12233 (const char *)&ctxt->input->base[old_avail],
12234 avail - old_avail)))
12235 xmlParseTryOrFinish(ctxt, terminate);
12236 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012237 if (ctxt->instate == XML_PARSER_EOF)
12238 return(ctxt->errNo);
12239
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012240 if ((ctxt->input != NULL) &&
12241 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12242 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12243 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12244 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012245 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012246 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012247 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12248 return(ctxt->errNo);
12249
12250 if (remain != 0) {
12251 chunk += size;
12252 size = remain;
12253 remain = 0;
12254 goto xmldecl_done;
12255 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012256 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12257 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012258 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12259 ctxt->input);
12260 size_t current = ctxt->input->cur - ctxt->input->base;
12261
Daniel Veillarda617e242006-01-09 14:38:44 +000012262 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012263
12264 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12265 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012266 }
Owen Taylor3473f882001-02-23 17:55:21 +000012267 if (terminate) {
12268 /*
12269 * Check for termination
12270 */
Daniel Veillard65686452012-07-19 18:25:01 +080012271 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012272
12273 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012274 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012275 cur_avail = ctxt->input->length -
12276 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012277 else
Daniel Veillard65686452012-07-19 18:25:01 +080012278 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12279 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012280 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012281
Owen Taylor3473f882001-02-23 17:55:21 +000012282 if ((ctxt->instate != XML_PARSER_EOF) &&
12283 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012284 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012285 }
Daniel Veillard65686452012-07-19 18:25:01 +080012286 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012287 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012288 }
Owen Taylor3473f882001-02-23 17:55:21 +000012289 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012290 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012291 ctxt->sax->endDocument(ctxt->userData);
12292 }
12293 ctxt->instate = XML_PARSER_EOF;
12294 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012295 if (ctxt->wellFormed == 0)
12296 return((xmlParserErrors) ctxt->errNo);
12297 else
12298 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012299}
12300
12301/************************************************************************
12302 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012303 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012304 * *
12305 ************************************************************************/
12306
12307/**
Owen Taylor3473f882001-02-23 17:55:21 +000012308 * xmlCreatePushParserCtxt:
12309 * @sax: a SAX handler
12310 * @user_data: The user data returned on SAX callbacks
12311 * @chunk: a pointer to an array of chars
12312 * @size: number of chars in the array
12313 * @filename: an optional file name or URI
12314 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012315 * Create a parser context for using the XML parser in push mode.
12316 * If @buffer and @size are non-NULL, the data is used to detect
12317 * the encoding. The remaining characters will be parsed so they
12318 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012319 * To allow content encoding detection, @size should be >= 4
12320 * The value of @filename is used for fetching external entities
12321 * and error/warning reports.
12322 *
12323 * Returns the new parser context or NULL
12324 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012325
Owen Taylor3473f882001-02-23 17:55:21 +000012326xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012327xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012328 const char *chunk, int size, const char *filename) {
12329 xmlParserCtxtPtr ctxt;
12330 xmlParserInputPtr inputStream;
12331 xmlParserInputBufferPtr buf;
12332 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12333
12334 /*
12335 * plug some encoding conversion routines
12336 */
12337 if ((chunk != NULL) && (size >= 4))
12338 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12339
12340 buf = xmlAllocParserInputBuffer(enc);
12341 if (buf == NULL) return(NULL);
12342
12343 ctxt = xmlNewParserCtxt();
12344 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012345 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012346 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012347 return(NULL);
12348 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012349 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012350 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12351 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012352 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012353 xmlFreeParserInputBuffer(buf);
12354 xmlFreeParserCtxt(ctxt);
12355 return(NULL);
12356 }
Owen Taylor3473f882001-02-23 17:55:21 +000012357 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012358#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012359 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012360#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012361 xmlFree(ctxt->sax);
12362 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12363 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012364 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012365 xmlFreeParserInputBuffer(buf);
12366 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012367 return(NULL);
12368 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012369 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12370 if (sax->initialized == XML_SAX2_MAGIC)
12371 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12372 else
12373 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012374 if (user_data != NULL)
12375 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012376 }
Owen Taylor3473f882001-02-23 17:55:21 +000012377 if (filename == NULL) {
12378 ctxt->directory = NULL;
12379 } else {
12380 ctxt->directory = xmlParserGetDirectory(filename);
12381 }
12382
12383 inputStream = xmlNewInputStream(ctxt);
12384 if (inputStream == NULL) {
12385 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012386 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012387 return(NULL);
12388 }
12389
12390 if (filename == NULL)
12391 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012392 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012393 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012394 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012395 if (inputStream->filename == NULL) {
12396 xmlFreeParserCtxt(ctxt);
12397 xmlFreeParserInputBuffer(buf);
12398 return(NULL);
12399 }
12400 }
Owen Taylor3473f882001-02-23 17:55:21 +000012401 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012402 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012403 inputPush(ctxt, inputStream);
12404
William M. Brack3a1cd212005-02-11 14:35:54 +000012405 /*
12406 * If the caller didn't provide an initial 'chunk' for determining
12407 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12408 * that it can be automatically determined later
12409 */
12410 if ((size == 0) || (chunk == NULL)) {
12411 ctxt->charset = XML_CHAR_ENCODING_NONE;
12412 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012413 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12414 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012415
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012416 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012417
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012418 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012419#ifdef DEBUG_PUSH
12420 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12421#endif
12422 }
12423
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012424 if (enc != XML_CHAR_ENCODING_NONE) {
12425 xmlSwitchEncoding(ctxt, enc);
12426 }
12427
Owen Taylor3473f882001-02-23 17:55:21 +000012428 return(ctxt);
12429}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012430#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012431
12432/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012433 * xmlHaltParser:
12434 * @ctxt: an XML parser context
12435 *
12436 * Blocks further parser processing don't override error
12437 * for internal use
12438 */
12439static void
12440xmlHaltParser(xmlParserCtxtPtr ctxt) {
12441 if (ctxt == NULL)
12442 return;
12443 ctxt->instate = XML_PARSER_EOF;
12444 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012445 while (ctxt->inputNr > 1)
12446 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012447 if (ctxt->input != NULL) {
12448 /*
12449 * in case there was a specific allocation deallocate before
12450 * overriding base
12451 */
12452 if (ctxt->input->free != NULL) {
12453 ctxt->input->free((xmlChar *) ctxt->input->base);
12454 ctxt->input->free = NULL;
12455 }
12456 ctxt->input->cur = BAD_CAST"";
12457 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012458 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012459 }
12460}
12461
12462/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012463 * xmlStopParser:
12464 * @ctxt: an XML parser context
12465 *
12466 * Blocks further parser processing
12467 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012468void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012469xmlStopParser(xmlParserCtxtPtr ctxt) {
12470 if (ctxt == NULL)
12471 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012472 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012473 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012474}
12475
12476/**
Owen Taylor3473f882001-02-23 17:55:21 +000012477 * xmlCreateIOParserCtxt:
12478 * @sax: a SAX handler
12479 * @user_data: The user data returned on SAX callbacks
12480 * @ioread: an I/O read function
12481 * @ioclose: an I/O close function
12482 * @ioctx: an I/O handler
12483 * @enc: the charset encoding if known
12484 *
12485 * Create a parser context for using the XML parser with an existing
12486 * I/O stream
12487 *
12488 * Returns the new parser context or NULL
12489 */
12490xmlParserCtxtPtr
12491xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12492 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12493 void *ioctx, xmlCharEncoding enc) {
12494 xmlParserCtxtPtr ctxt;
12495 xmlParserInputPtr inputStream;
12496 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012497
Daniel Veillard42595322004-11-08 10:52:06 +000012498 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012499
12500 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012501 if (buf == NULL) {
12502 if (ioclose != NULL)
12503 ioclose(ioctx);
12504 return (NULL);
12505 }
Owen Taylor3473f882001-02-23 17:55:21 +000012506
12507 ctxt = xmlNewParserCtxt();
12508 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012509 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012510 return(NULL);
12511 }
12512 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012513#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012514 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012515#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012516 xmlFree(ctxt->sax);
12517 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12518 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012519 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012520 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012521 return(NULL);
12522 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012523 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12524 if (sax->initialized == XML_SAX2_MAGIC)
12525 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12526 else
12527 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012528 if (user_data != NULL)
12529 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012530 }
Owen Taylor3473f882001-02-23 17:55:21 +000012531
12532 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12533 if (inputStream == NULL) {
12534 xmlFreeParserCtxt(ctxt);
12535 return(NULL);
12536 }
12537 inputPush(ctxt, inputStream);
12538
12539 return(ctxt);
12540}
12541
Daniel Veillard4432df22003-09-28 18:58:27 +000012542#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012543/************************************************************************
12544 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012545 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012546 * *
12547 ************************************************************************/
12548
12549/**
12550 * xmlIOParseDTD:
12551 * @sax: the SAX handler block or NULL
12552 * @input: an Input Buffer
12553 * @enc: the charset encoding if known
12554 *
12555 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012556 *
Owen Taylor3473f882001-02-23 17:55:21 +000012557 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012558 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012559 */
12560
12561xmlDtdPtr
12562xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12563 xmlCharEncoding enc) {
12564 xmlDtdPtr ret = NULL;
12565 xmlParserCtxtPtr ctxt;
12566 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012567 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012568
12569 if (input == NULL)
12570 return(NULL);
12571
12572 ctxt = xmlNewParserCtxt();
12573 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012574 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012575 return(NULL);
12576 }
12577
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012578 /* We are loading a DTD */
12579 ctxt->options |= XML_PARSE_DTDLOAD;
12580
Owen Taylor3473f882001-02-23 17:55:21 +000012581 /*
12582 * Set-up the SAX context
12583 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012584 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012585 if (ctxt->sax != NULL)
12586 xmlFree(ctxt->sax);
12587 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012588 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012589 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012590 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012591
12592 /*
12593 * generate a parser input from the I/O handler
12594 */
12595
Daniel Veillard43caefb2003-12-07 19:32:22 +000012596 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012597 if (pinput == NULL) {
12598 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012599 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012600 xmlFreeParserCtxt(ctxt);
12601 return(NULL);
12602 }
12603
12604 /*
12605 * plug some encoding conversion routines here.
12606 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012607 if (xmlPushInput(ctxt, pinput) < 0) {
12608 if (sax != NULL) ctxt->sax = NULL;
12609 xmlFreeParserCtxt(ctxt);
12610 return(NULL);
12611 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012612 if (enc != XML_CHAR_ENCODING_NONE) {
12613 xmlSwitchEncoding(ctxt, enc);
12614 }
Owen Taylor3473f882001-02-23 17:55:21 +000012615
12616 pinput->filename = NULL;
12617 pinput->line = 1;
12618 pinput->col = 1;
12619 pinput->base = ctxt->input->cur;
12620 pinput->cur = ctxt->input->cur;
12621 pinput->free = NULL;
12622
12623 /*
12624 * let's parse that entity knowing it's an external subset.
12625 */
12626 ctxt->inSubset = 2;
12627 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012628 if (ctxt->myDoc == NULL) {
12629 xmlErrMemory(ctxt, "New Doc failed");
12630 return(NULL);
12631 }
12632 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012633 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12634 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012635
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012636 if ((enc == XML_CHAR_ENCODING_NONE) &&
12637 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012638 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012639 * Get the 4 first bytes and decode the charset
12640 * if enc != XML_CHAR_ENCODING_NONE
12641 * plug some encoding conversion routines.
12642 */
12643 start[0] = RAW;
12644 start[1] = NXT(1);
12645 start[2] = NXT(2);
12646 start[3] = NXT(3);
12647 enc = xmlDetectCharEncoding(start, 4);
12648 if (enc != XML_CHAR_ENCODING_NONE) {
12649 xmlSwitchEncoding(ctxt, enc);
12650 }
12651 }
12652
Owen Taylor3473f882001-02-23 17:55:21 +000012653 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12654
12655 if (ctxt->myDoc != NULL) {
12656 if (ctxt->wellFormed) {
12657 ret = ctxt->myDoc->extSubset;
12658 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012659 if (ret != NULL) {
12660 xmlNodePtr tmp;
12661
12662 ret->doc = NULL;
12663 tmp = ret->children;
12664 while (tmp != NULL) {
12665 tmp->doc = NULL;
12666 tmp = tmp->next;
12667 }
12668 }
Owen Taylor3473f882001-02-23 17:55:21 +000012669 } else {
12670 ret = NULL;
12671 }
12672 xmlFreeDoc(ctxt->myDoc);
12673 ctxt->myDoc = NULL;
12674 }
12675 if (sax != NULL) ctxt->sax = NULL;
12676 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012677
Owen Taylor3473f882001-02-23 17:55:21 +000012678 return(ret);
12679}
12680
12681/**
12682 * xmlSAXParseDTD:
12683 * @sax: the SAX handler block
12684 * @ExternalID: a NAME* containing the External ID of the DTD
12685 * @SystemID: a NAME* containing the URL to the DTD
12686 *
12687 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012688 *
Owen Taylor3473f882001-02-23 17:55:21 +000012689 * Returns the resulting xmlDtdPtr or NULL in case of error.
12690 */
12691
12692xmlDtdPtr
12693xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12694 const xmlChar *SystemID) {
12695 xmlDtdPtr ret = NULL;
12696 xmlParserCtxtPtr ctxt;
12697 xmlParserInputPtr input = NULL;
12698 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012699 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012700
12701 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12702
12703 ctxt = xmlNewParserCtxt();
12704 if (ctxt == NULL) {
12705 return(NULL);
12706 }
12707
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012708 /* We are loading a DTD */
12709 ctxt->options |= XML_PARSE_DTDLOAD;
12710
Owen Taylor3473f882001-02-23 17:55:21 +000012711 /*
12712 * Set-up the SAX context
12713 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012714 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012715 if (ctxt->sax != NULL)
12716 xmlFree(ctxt->sax);
12717 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012718 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012719 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012720
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012721 /*
12722 * Canonicalise the system ID
12723 */
12724 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012725 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012726 xmlFreeParserCtxt(ctxt);
12727 return(NULL);
12728 }
Owen Taylor3473f882001-02-23 17:55:21 +000012729
12730 /*
12731 * Ask the Entity resolver to load the damn thing
12732 */
12733
12734 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012735 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12736 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012737 if (input == NULL) {
12738 if (sax != NULL) ctxt->sax = NULL;
12739 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012740 if (systemIdCanonic != NULL)
12741 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012742 return(NULL);
12743 }
12744
12745 /*
12746 * plug some encoding conversion routines here.
12747 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012748 if (xmlPushInput(ctxt, input) < 0) {
12749 if (sax != NULL) ctxt->sax = NULL;
12750 xmlFreeParserCtxt(ctxt);
12751 if (systemIdCanonic != NULL)
12752 xmlFree(systemIdCanonic);
12753 return(NULL);
12754 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012755 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12756 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12757 xmlSwitchEncoding(ctxt, enc);
12758 }
Owen Taylor3473f882001-02-23 17:55:21 +000012759
12760 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012761 input->filename = (char *) systemIdCanonic;
12762 else
12763 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012764 input->line = 1;
12765 input->col = 1;
12766 input->base = ctxt->input->cur;
12767 input->cur = ctxt->input->cur;
12768 input->free = NULL;
12769
12770 /*
12771 * let's parse that entity knowing it's an external subset.
12772 */
12773 ctxt->inSubset = 2;
12774 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012775 if (ctxt->myDoc == NULL) {
12776 xmlErrMemory(ctxt, "New Doc failed");
12777 if (sax != NULL) ctxt->sax = NULL;
12778 xmlFreeParserCtxt(ctxt);
12779 return(NULL);
12780 }
12781 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012782 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12783 ExternalID, SystemID);
12784 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12785
12786 if (ctxt->myDoc != NULL) {
12787 if (ctxt->wellFormed) {
12788 ret = ctxt->myDoc->extSubset;
12789 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012790 if (ret != NULL) {
12791 xmlNodePtr tmp;
12792
12793 ret->doc = NULL;
12794 tmp = ret->children;
12795 while (tmp != NULL) {
12796 tmp->doc = NULL;
12797 tmp = tmp->next;
12798 }
12799 }
Owen Taylor3473f882001-02-23 17:55:21 +000012800 } else {
12801 ret = NULL;
12802 }
12803 xmlFreeDoc(ctxt->myDoc);
12804 ctxt->myDoc = NULL;
12805 }
12806 if (sax != NULL) ctxt->sax = NULL;
12807 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012808
Owen Taylor3473f882001-02-23 17:55:21 +000012809 return(ret);
12810}
12811
Daniel Veillard4432df22003-09-28 18:58:27 +000012812
Owen Taylor3473f882001-02-23 17:55:21 +000012813/**
12814 * xmlParseDTD:
12815 * @ExternalID: a NAME* containing the External ID of the DTD
12816 * @SystemID: a NAME* containing the URL to the DTD
12817 *
12818 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012819 *
Owen Taylor3473f882001-02-23 17:55:21 +000012820 * Returns the resulting xmlDtdPtr or NULL in case of error.
12821 */
12822
12823xmlDtdPtr
12824xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12825 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12826}
Daniel Veillard4432df22003-09-28 18:58:27 +000012827#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012828
12829/************************************************************************
12830 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012831 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012832 * *
12833 ************************************************************************/
12834
12835/**
Owen Taylor3473f882001-02-23 17:55:21 +000012836 * xmlParseCtxtExternalEntity:
12837 * @ctx: the existing parsing context
12838 * @URL: the URL for the entity to load
12839 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012840 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012841 *
12842 * Parse an external general entity within an existing parsing context
12843 * An external general parsed entity is well-formed if it matches the
12844 * production labeled extParsedEnt.
12845 *
12846 * [78] extParsedEnt ::= TextDecl? content
12847 *
12848 * Returns 0 if the entity is well formed, -1 in case of args problem and
12849 * the parser error code otherwise
12850 */
12851
12852int
12853xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012854 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012855 xmlParserCtxtPtr ctxt;
12856 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012857 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012858 xmlSAXHandlerPtr oldsax = NULL;
12859 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012860 xmlChar start[4];
12861 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012862
Daniel Veillardce682bc2004-11-05 17:22:25 +000012863 if (ctx == NULL) return(-1);
12864
Daniel Veillard0161e632008-08-28 15:36:32 +000012865 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12866 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012867 return(XML_ERR_ENTITY_LOOP);
12868 }
12869
Daniel Veillardcda96922001-08-21 10:56:31 +000012870 if (lst != NULL)
12871 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012872 if ((URL == NULL) && (ID == NULL))
12873 return(-1);
12874 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12875 return(-1);
12876
Rob Richards798743a2009-06-19 13:54:25 -040012877 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012878 if (ctxt == NULL) {
12879 return(-1);
12880 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012881
Owen Taylor3473f882001-02-23 17:55:21 +000012882 oldsax = ctxt->sax;
12883 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012884 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012885 newDoc = xmlNewDoc(BAD_CAST "1.0");
12886 if (newDoc == NULL) {
12887 xmlFreeParserCtxt(ctxt);
12888 return(-1);
12889 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012890 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012891 if (ctx->myDoc->dict) {
12892 newDoc->dict = ctx->myDoc->dict;
12893 xmlDictReference(newDoc->dict);
12894 }
Owen Taylor3473f882001-02-23 17:55:21 +000012895 if (ctx->myDoc != NULL) {
12896 newDoc->intSubset = ctx->myDoc->intSubset;
12897 newDoc->extSubset = ctx->myDoc->extSubset;
12898 }
12899 if (ctx->myDoc->URL != NULL) {
12900 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12901 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012902 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12903 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012904 ctxt->sax = oldsax;
12905 xmlFreeParserCtxt(ctxt);
12906 newDoc->intSubset = NULL;
12907 newDoc->extSubset = NULL;
12908 xmlFreeDoc(newDoc);
12909 return(-1);
12910 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012911 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012912 nodePush(ctxt, newDoc->children);
12913 if (ctx->myDoc == NULL) {
12914 ctxt->myDoc = newDoc;
12915 } else {
12916 ctxt->myDoc = ctx->myDoc;
12917 newDoc->children->doc = ctx->myDoc;
12918 }
12919
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012920 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012921 * Get the 4 first bytes and decode the charset
12922 * if enc != XML_CHAR_ENCODING_NONE
12923 * plug some encoding conversion routines.
12924 */
12925 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012926 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12927 start[0] = RAW;
12928 start[1] = NXT(1);
12929 start[2] = NXT(2);
12930 start[3] = NXT(3);
12931 enc = xmlDetectCharEncoding(start, 4);
12932 if (enc != XML_CHAR_ENCODING_NONE) {
12933 xmlSwitchEncoding(ctxt, enc);
12934 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012935 }
12936
Owen Taylor3473f882001-02-23 17:55:21 +000012937 /*
12938 * Parse a possible text declaration first
12939 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012940 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012941 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012942 /*
12943 * An XML-1.0 document can't reference an entity not XML-1.0
12944 */
12945 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12946 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012947 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012948 "Version mismatch between document and entity\n");
12949 }
Owen Taylor3473f882001-02-23 17:55:21 +000012950 }
12951
12952 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012953 * If the user provided its own SAX callbacks then reuse the
12954 * useData callback field, otherwise the expected setup in a
12955 * DOM builder is to have userData == ctxt
12956 */
12957 if (ctx->userData == ctx)
12958 ctxt->userData = ctxt;
12959 else
12960 ctxt->userData = ctx->userData;
12961
12962 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012963 * Doing validity checking on chunk doesn't make sense
12964 */
12965 ctxt->instate = XML_PARSER_CONTENT;
12966 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012967 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012968 ctxt->loadsubset = ctx->loadsubset;
12969 ctxt->depth = ctx->depth + 1;
12970 ctxt->replaceEntities = ctx->replaceEntities;
12971 if (ctxt->validate) {
12972 ctxt->vctxt.error = ctx->vctxt.error;
12973 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012974 } else {
12975 ctxt->vctxt.error = NULL;
12976 ctxt->vctxt.warning = NULL;
12977 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012978 ctxt->vctxt.nodeTab = NULL;
12979 ctxt->vctxt.nodeNr = 0;
12980 ctxt->vctxt.nodeMax = 0;
12981 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012982 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12983 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012984 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12985 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12986 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012987 ctxt->dictNames = ctx->dictNames;
12988 ctxt->attsDefault = ctx->attsDefault;
12989 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012990 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012991
12992 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012993
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012994 ctx->validate = ctxt->validate;
12995 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012996 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012997 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012998 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012999 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013000 }
13001 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013002 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013003 }
13004
13005 if (!ctxt->wellFormed) {
13006 if (ctxt->errNo == 0)
13007 ret = 1;
13008 else
13009 ret = ctxt->errNo;
13010 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013011 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013012 xmlNodePtr cur;
13013
13014 /*
13015 * Return the newly created nodeset after unlinking it from
13016 * they pseudo parent.
13017 */
13018 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013019 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013020 while (cur != NULL) {
13021 cur->parent = NULL;
13022 cur = cur->next;
13023 }
13024 newDoc->children->children = NULL;
13025 }
13026 ret = 0;
13027 }
13028 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013029 ctxt->dict = NULL;
13030 ctxt->attsDefault = NULL;
13031 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013032 xmlFreeParserCtxt(ctxt);
13033 newDoc->intSubset = NULL;
13034 newDoc->extSubset = NULL;
13035 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013036
Owen Taylor3473f882001-02-23 17:55:21 +000013037 return(ret);
13038}
13039
13040/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013041 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013042 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013043 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013044 * @sax: the SAX handler bloc (possibly NULL)
13045 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13046 * @depth: Used for loop detection, use 0
13047 * @URL: the URL for the entity to load
13048 * @ID: the System ID for the entity to load
13049 * @list: the return value for the set of parsed nodes
13050 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013051 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013052 *
13053 * Returns 0 if the entity is well formed, -1 in case of args problem and
13054 * the parser error code otherwise
13055 */
13056
Daniel Veillard7d515752003-09-26 19:12:37 +000013057static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013058xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13059 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013060 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013061 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013062 xmlParserCtxtPtr ctxt;
13063 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013064 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013065 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013066 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013067 xmlChar start[4];
13068 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013069
Daniel Veillard0161e632008-08-28 15:36:32 +000013070 if (((depth > 40) &&
13071 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13072 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013073 return(XML_ERR_ENTITY_LOOP);
13074 }
13075
Owen Taylor3473f882001-02-23 17:55:21 +000013076 if (list != NULL)
13077 *list = NULL;
13078 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013079 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013080 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013081 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013082
13083
Rob Richards9c0aa472009-03-26 18:10:19 +000013084 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013085 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013086 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013087 if (oldctxt != NULL) {
13088 ctxt->_private = oldctxt->_private;
13089 ctxt->loadsubset = oldctxt->loadsubset;
13090 ctxt->validate = oldctxt->validate;
13091 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013092 ctxt->record_info = oldctxt->record_info;
13093 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13094 ctxt->node_seq.length = oldctxt->node_seq.length;
13095 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013096 } else {
13097 /*
13098 * Doing validity checking on chunk without context
13099 * doesn't make sense
13100 */
13101 ctxt->_private = NULL;
13102 ctxt->validate = 0;
13103 ctxt->external = 2;
13104 ctxt->loadsubset = 0;
13105 }
Owen Taylor3473f882001-02-23 17:55:21 +000013106 if (sax != NULL) {
13107 oldsax = ctxt->sax;
13108 ctxt->sax = sax;
13109 if (user_data != NULL)
13110 ctxt->userData = user_data;
13111 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013112 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013113 newDoc = xmlNewDoc(BAD_CAST "1.0");
13114 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013115 ctxt->node_seq.maximum = 0;
13116 ctxt->node_seq.length = 0;
13117 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013118 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013119 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013120 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013121 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013122 newDoc->intSubset = doc->intSubset;
13123 newDoc->extSubset = doc->extSubset;
13124 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013125 xmlDictReference(newDoc->dict);
13126
Owen Taylor3473f882001-02-23 17:55:21 +000013127 if (doc->URL != NULL) {
13128 newDoc->URL = xmlStrdup(doc->URL);
13129 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013130 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13131 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013132 if (sax != NULL)
13133 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013134 ctxt->node_seq.maximum = 0;
13135 ctxt->node_seq.length = 0;
13136 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013137 xmlFreeParserCtxt(ctxt);
13138 newDoc->intSubset = NULL;
13139 newDoc->extSubset = NULL;
13140 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013141 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013142 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013143 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013144 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013145 ctxt->myDoc = doc;
13146 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013147
Daniel Veillard0161e632008-08-28 15:36:32 +000013148 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013149 * Get the 4 first bytes and decode the charset
13150 * if enc != XML_CHAR_ENCODING_NONE
13151 * plug some encoding conversion routines.
13152 */
13153 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013154 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13155 start[0] = RAW;
13156 start[1] = NXT(1);
13157 start[2] = NXT(2);
13158 start[3] = NXT(3);
13159 enc = xmlDetectCharEncoding(start, 4);
13160 if (enc != XML_CHAR_ENCODING_NONE) {
13161 xmlSwitchEncoding(ctxt, enc);
13162 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013163 }
13164
Owen Taylor3473f882001-02-23 17:55:21 +000013165 /*
13166 * Parse a possible text declaration first
13167 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013168 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013169 xmlParseTextDecl(ctxt);
13170 }
13171
Owen Taylor3473f882001-02-23 17:55:21 +000013172 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013173 ctxt->depth = depth;
13174
13175 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013176
Daniel Veillard561b7f82002-03-20 21:55:57 +000013177 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013178 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013179 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013180 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013181 }
13182 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013183 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013184 }
13185
13186 if (!ctxt->wellFormed) {
13187 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013188 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013189 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013190 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013191 } else {
13192 if (list != NULL) {
13193 xmlNodePtr cur;
13194
13195 /*
13196 * Return the newly created nodeset after unlinking it from
13197 * they pseudo parent.
13198 */
13199 cur = newDoc->children->children;
13200 *list = cur;
13201 while (cur != NULL) {
13202 cur->parent = NULL;
13203 cur = cur->next;
13204 }
13205 newDoc->children->children = NULL;
13206 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013207 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013208 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013209
13210 /*
13211 * Record in the parent context the number of entities replacement
13212 * done when parsing that reference.
13213 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013214 if (oldctxt != NULL)
13215 oldctxt->nbentities += ctxt->nbentities;
13216
Daniel Veillard0161e632008-08-28 15:36:32 +000013217 /*
13218 * Also record the size of the entity parsed
13219 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013220 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013221 oldctxt->sizeentities += ctxt->input->consumed;
13222 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13223 }
13224 /*
13225 * And record the last error if any
13226 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013227 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013228 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13229
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013230 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013231 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013232 if (oldctxt != NULL) {
13233 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13234 oldctxt->node_seq.length = ctxt->node_seq.length;
13235 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13236 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013237 ctxt->node_seq.maximum = 0;
13238 ctxt->node_seq.length = 0;
13239 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013240 xmlFreeParserCtxt(ctxt);
13241 newDoc->intSubset = NULL;
13242 newDoc->extSubset = NULL;
13243 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013244
Owen Taylor3473f882001-02-23 17:55:21 +000013245 return(ret);
13246}
13247
Daniel Veillard81273902003-09-30 00:43:48 +000013248#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013249/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013250 * xmlParseExternalEntity:
13251 * @doc: the document the chunk pertains to
13252 * @sax: the SAX handler bloc (possibly NULL)
13253 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13254 * @depth: Used for loop detection, use 0
13255 * @URL: the URL for the entity to load
13256 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013257 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013258 *
13259 * Parse an external general entity
13260 * An external general parsed entity is well-formed if it matches the
13261 * production labeled extParsedEnt.
13262 *
13263 * [78] extParsedEnt ::= TextDecl? content
13264 *
13265 * Returns 0 if the entity is well formed, -1 in case of args problem and
13266 * the parser error code otherwise
13267 */
13268
13269int
13270xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013271 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013272 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013273 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013274}
13275
13276/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013277 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013278 * @doc: the document the chunk pertains to
13279 * @sax: the SAX handler bloc (possibly NULL)
13280 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13281 * @depth: Used for loop detection, use 0
13282 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013283 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013284 *
13285 * Parse a well-balanced chunk of an XML document
13286 * called by the parser
13287 * The allowed sequence for the Well Balanced Chunk is the one defined by
13288 * the content production in the XML grammar:
13289 *
13290 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13291 *
13292 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13293 * the parser error code otherwise
13294 */
13295
13296int
13297xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013298 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013299 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13300 depth, string, lst, 0 );
13301}
Daniel Veillard81273902003-09-30 00:43:48 +000013302#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013303
13304/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013305 * xmlParseBalancedChunkMemoryInternal:
13306 * @oldctxt: the existing parsing context
13307 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13308 * @user_data: the user data field for the parser context
13309 * @lst: the return value for the set of parsed nodes
13310 *
13311 *
13312 * Parse a well-balanced chunk of an XML document
13313 * called by the parser
13314 * The allowed sequence for the Well Balanced Chunk is the one defined by
13315 * the content production in the XML grammar:
13316 *
13317 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13318 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013319 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13320 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013321 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013322 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013323 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013324 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013325static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013326xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13327 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13328 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013329 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013330 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013331 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013332 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013333 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013334 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013335 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013336#ifdef SAX2
13337 int i;
13338#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013339
Daniel Veillard0161e632008-08-28 15:36:32 +000013340 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13341 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013342 return(XML_ERR_ENTITY_LOOP);
13343 }
13344
13345
13346 if (lst != NULL)
13347 *lst = NULL;
13348 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013349 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013350
13351 size = xmlStrlen(string);
13352
13353 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013354 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013355 if (user_data != NULL)
13356 ctxt->userData = user_data;
13357 else
13358 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013359 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13360 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013361 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13362 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13363 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013364
Daniel Veillard74eaec12009-08-26 15:57:20 +020013365#ifdef SAX2
13366 /* propagate namespaces down the entity */
13367 for (i = 0;i < oldctxt->nsNr;i += 2) {
13368 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13369 }
13370#endif
13371
Daniel Veillard328f48c2002-11-15 15:24:34 +000013372 oldsax = ctxt->sax;
13373 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013374 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013375 ctxt->replaceEntities = oldctxt->replaceEntities;
13376 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013377
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013378 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013379 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013380 newDoc = xmlNewDoc(BAD_CAST "1.0");
13381 if (newDoc == NULL) {
13382 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013383 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013384 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013385 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013386 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013387 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013388 newDoc->dict = ctxt->dict;
13389 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013390 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013391 } else {
13392 ctxt->myDoc = oldctxt->myDoc;
13393 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013394 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013395 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013396 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13397 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013398 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013399 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013400 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013401 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013402 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013403 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013404 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013405 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013406 ctxt->myDoc->children = NULL;
13407 ctxt->myDoc->last = NULL;
13408 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013409 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013410 ctxt->instate = XML_PARSER_CONTENT;
13411 ctxt->depth = oldctxt->depth + 1;
13412
Daniel Veillard328f48c2002-11-15 15:24:34 +000013413 ctxt->validate = 0;
13414 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013415 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13416 /*
13417 * ID/IDREF registration will be done in xmlValidateElement below
13418 */
13419 ctxt->loadsubset |= XML_SKIP_IDS;
13420 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013421 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013422 ctxt->attsDefault = oldctxt->attsDefault;
13423 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013424
Daniel Veillard68e9e742002-11-16 15:35:11 +000013425 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013426 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013427 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013428 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013429 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013430 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013431 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013432 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013433 }
13434
13435 if (!ctxt->wellFormed) {
13436 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013437 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013438 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013439 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013440 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013441 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013442 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013443
William M. Brack7b9154b2003-09-27 19:23:50 +000013444 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013445 xmlNodePtr cur;
13446
13447 /*
13448 * Return the newly created nodeset after unlinking it from
13449 * they pseudo parent.
13450 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013451 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013452 *lst = cur;
13453 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013454#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013455 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13456 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13457 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013458 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13459 oldctxt->myDoc, cur);
13460 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013461#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013462 cur->parent = NULL;
13463 cur = cur->next;
13464 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013465 ctxt->myDoc->children->children = NULL;
13466 }
13467 if (ctxt->myDoc != NULL) {
13468 xmlFreeNode(ctxt->myDoc->children);
13469 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013470 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013471 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013472
13473 /*
13474 * Record in the parent context the number of entities replacement
13475 * done when parsing that reference.
13476 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013477 if (oldctxt != NULL)
13478 oldctxt->nbentities += ctxt->nbentities;
13479
Daniel Veillard0161e632008-08-28 15:36:32 +000013480 /*
13481 * Also record the last error if any
13482 */
13483 if (ctxt->lastError.code != XML_ERR_OK)
13484 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13485
Daniel Veillard328f48c2002-11-15 15:24:34 +000013486 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013487 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013488 ctxt->attsDefault = NULL;
13489 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013490 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013491 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013492 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013493 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013494
Daniel Veillard328f48c2002-11-15 15:24:34 +000013495 return(ret);
13496}
13497
Daniel Veillard29b17482004-08-16 00:39:03 +000013498/**
13499 * xmlParseInNodeContext:
13500 * @node: the context node
13501 * @data: the input string
13502 * @datalen: the input string length in bytes
13503 * @options: a combination of xmlParserOption
13504 * @lst: the return value for the set of parsed nodes
13505 *
13506 * Parse a well-balanced chunk of an XML document
13507 * within the context (DTD, namespaces, etc ...) of the given node.
13508 *
13509 * The allowed sequence for the data is a Well Balanced Chunk defined by
13510 * the content production in the XML grammar:
13511 *
13512 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13513 *
13514 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13515 * error code otherwise
13516 */
13517xmlParserErrors
13518xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13519 int options, xmlNodePtr *lst) {
13520#ifdef SAX2
13521 xmlParserCtxtPtr ctxt;
13522 xmlDocPtr doc = NULL;
13523 xmlNodePtr fake, cur;
13524 int nsnr = 0;
13525
13526 xmlParserErrors ret = XML_ERR_OK;
13527
13528 /*
13529 * check all input parameters, grab the document
13530 */
13531 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13532 return(XML_ERR_INTERNAL_ERROR);
13533 switch (node->type) {
13534 case XML_ELEMENT_NODE:
13535 case XML_ATTRIBUTE_NODE:
13536 case XML_TEXT_NODE:
13537 case XML_CDATA_SECTION_NODE:
13538 case XML_ENTITY_REF_NODE:
13539 case XML_PI_NODE:
13540 case XML_COMMENT_NODE:
13541 case XML_DOCUMENT_NODE:
13542 case XML_HTML_DOCUMENT_NODE:
13543 break;
13544 default:
13545 return(XML_ERR_INTERNAL_ERROR);
13546
13547 }
13548 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13549 (node->type != XML_DOCUMENT_NODE) &&
13550 (node->type != XML_HTML_DOCUMENT_NODE))
13551 node = node->parent;
13552 if (node == NULL)
13553 return(XML_ERR_INTERNAL_ERROR);
13554 if (node->type == XML_ELEMENT_NODE)
13555 doc = node->doc;
13556 else
13557 doc = (xmlDocPtr) node;
13558 if (doc == NULL)
13559 return(XML_ERR_INTERNAL_ERROR);
13560
13561 /*
13562 * allocate a context and set-up everything not related to the
13563 * node position in the tree
13564 */
13565 if (doc->type == XML_DOCUMENT_NODE)
13566 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13567#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013568 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013569 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013570 /*
13571 * When parsing in context, it makes no sense to add implied
13572 * elements like html/body/etc...
13573 */
13574 options |= HTML_PARSE_NOIMPLIED;
13575 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013576#endif
13577 else
13578 return(XML_ERR_INTERNAL_ERROR);
13579
13580 if (ctxt == NULL)
13581 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013582
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013583 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013584 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13585 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13586 * we must wait until the last moment to free the original one.
13587 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013588 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013589 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013590 xmlDictFree(ctxt->dict);
13591 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013592 } else
13593 options |= XML_PARSE_NODICT;
13594
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013595 if (doc->encoding != NULL) {
13596 xmlCharEncodingHandlerPtr hdlr;
13597
13598 if (ctxt->encoding != NULL)
13599 xmlFree((xmlChar *) ctxt->encoding);
13600 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13601
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013602 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013603 if (hdlr != NULL) {
13604 xmlSwitchToEncoding(ctxt, hdlr);
13605 } else {
13606 return(XML_ERR_UNSUPPORTED_ENCODING);
13607 }
13608 }
13609
Daniel Veillard37334572008-07-31 08:20:02 +000013610 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013611 xmlDetectSAX2(ctxt);
13612 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013613 /* parsing in context, i.e. as within existing content */
13614 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013615
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013616 fake = xmlNewComment(NULL);
13617 if (fake == NULL) {
13618 xmlFreeParserCtxt(ctxt);
13619 return(XML_ERR_NO_MEMORY);
13620 }
13621 xmlAddChild(node, fake);
13622
Daniel Veillard29b17482004-08-16 00:39:03 +000013623 if (node->type == XML_ELEMENT_NODE) {
13624 nodePush(ctxt, node);
13625 /*
13626 * initialize the SAX2 namespaces stack
13627 */
13628 cur = node;
13629 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13630 xmlNsPtr ns = cur->nsDef;
13631 const xmlChar *iprefix, *ihref;
13632
13633 while (ns != NULL) {
13634 if (ctxt->dict) {
13635 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13636 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13637 } else {
13638 iprefix = ns->prefix;
13639 ihref = ns->href;
13640 }
13641
13642 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13643 nsPush(ctxt, iprefix, ihref);
13644 nsnr++;
13645 }
13646 ns = ns->next;
13647 }
13648 cur = cur->parent;
13649 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013650 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013651
13652 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13653 /*
13654 * ID/IDREF registration will be done in xmlValidateElement below
13655 */
13656 ctxt->loadsubset |= XML_SKIP_IDS;
13657 }
13658
Daniel Veillard499cc922006-01-18 17:22:35 +000013659#ifdef LIBXML_HTML_ENABLED
13660 if (doc->type == XML_HTML_DOCUMENT_NODE)
13661 __htmlParseContent(ctxt);
13662 else
13663#endif
13664 xmlParseContent(ctxt);
13665
Daniel Veillard29b17482004-08-16 00:39:03 +000013666 nsPop(ctxt, nsnr);
13667 if ((RAW == '<') && (NXT(1) == '/')) {
13668 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13669 } else if (RAW != 0) {
13670 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13671 }
13672 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13673 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13674 ctxt->wellFormed = 0;
13675 }
13676
13677 if (!ctxt->wellFormed) {
13678 if (ctxt->errNo == 0)
13679 ret = XML_ERR_INTERNAL_ERROR;
13680 else
13681 ret = (xmlParserErrors)ctxt->errNo;
13682 } else {
13683 ret = XML_ERR_OK;
13684 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013685
Daniel Veillard29b17482004-08-16 00:39:03 +000013686 /*
13687 * Return the newly created nodeset after unlinking it from
13688 * the pseudo sibling.
13689 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013690
Daniel Veillard29b17482004-08-16 00:39:03 +000013691 cur = fake->next;
13692 fake->next = NULL;
13693 node->last = fake;
13694
13695 if (cur != NULL) {
13696 cur->prev = NULL;
13697 }
13698
13699 *lst = cur;
13700
13701 while (cur != NULL) {
13702 cur->parent = NULL;
13703 cur = cur->next;
13704 }
13705
13706 xmlUnlinkNode(fake);
13707 xmlFreeNode(fake);
13708
13709
13710 if (ret != XML_ERR_OK) {
13711 xmlFreeNodeList(*lst);
13712 *lst = NULL;
13713 }
William M. Brackc3f81342004-10-03 01:22:44 +000013714
William M. Brackb7b54de2004-10-06 16:38:01 +000013715 if (doc->dict != NULL)
13716 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013717 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013718
Daniel Veillard29b17482004-08-16 00:39:03 +000013719 return(ret);
13720#else /* !SAX2 */
13721 return(XML_ERR_INTERNAL_ERROR);
13722#endif
13723}
13724
Daniel Veillard81273902003-09-30 00:43:48 +000013725#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013726/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013727 * xmlParseBalancedChunkMemoryRecover:
13728 * @doc: the document the chunk pertains to
13729 * @sax: the SAX handler bloc (possibly NULL)
13730 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13731 * @depth: Used for loop detection, use 0
13732 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13733 * @lst: the return value for the set of parsed nodes
13734 * @recover: return nodes even if the data is broken (use 0)
13735 *
13736 *
13737 * Parse a well-balanced chunk of an XML document
13738 * called by the parser
13739 * The allowed sequence for the Well Balanced Chunk is the one defined by
13740 * the content production in the XML grammar:
13741 *
13742 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13743 *
13744 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13745 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013746 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013747 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013748 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13749 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013750 */
13751int
13752xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013753 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013754 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013755 xmlParserCtxtPtr ctxt;
13756 xmlDocPtr newDoc;
13757 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013758 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013759 int size;
13760 int ret = 0;
13761
Daniel Veillard0161e632008-08-28 15:36:32 +000013762 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013763 return(XML_ERR_ENTITY_LOOP);
13764 }
13765
13766
Daniel Veillardcda96922001-08-21 10:56:31 +000013767 if (lst != NULL)
13768 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013769 if (string == NULL)
13770 return(-1);
13771
13772 size = xmlStrlen(string);
13773
13774 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13775 if (ctxt == NULL) return(-1);
13776 ctxt->userData = ctxt;
13777 if (sax != NULL) {
13778 oldsax = ctxt->sax;
13779 ctxt->sax = sax;
13780 if (user_data != NULL)
13781 ctxt->userData = user_data;
13782 }
13783 newDoc = xmlNewDoc(BAD_CAST "1.0");
13784 if (newDoc == NULL) {
13785 xmlFreeParserCtxt(ctxt);
13786 return(-1);
13787 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013788 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013789 if ((doc != NULL) && (doc->dict != NULL)) {
13790 xmlDictFree(ctxt->dict);
13791 ctxt->dict = doc->dict;
13792 xmlDictReference(ctxt->dict);
13793 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13794 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13795 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13796 ctxt->dictNames = 1;
13797 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013798 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013799 }
Owen Taylor3473f882001-02-23 17:55:21 +000013800 if (doc != NULL) {
13801 newDoc->intSubset = doc->intSubset;
13802 newDoc->extSubset = doc->extSubset;
13803 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013804 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13805 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013806 if (sax != NULL)
13807 ctxt->sax = oldsax;
13808 xmlFreeParserCtxt(ctxt);
13809 newDoc->intSubset = NULL;
13810 newDoc->extSubset = NULL;
13811 xmlFreeDoc(newDoc);
13812 return(-1);
13813 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013814 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13815 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013816 if (doc == NULL) {
13817 ctxt->myDoc = newDoc;
13818 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013819 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013820 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013821 /* Ensure that doc has XML spec namespace */
13822 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13823 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013824 }
13825 ctxt->instate = XML_PARSER_CONTENT;
13826 ctxt->depth = depth;
13827
13828 /*
13829 * Doing validity checking on chunk doesn't make sense
13830 */
13831 ctxt->validate = 0;
13832 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013833 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013834
Daniel Veillardb39bc392002-10-26 19:29:51 +000013835 if ( doc != NULL ){
13836 content = doc->children;
13837 doc->children = NULL;
13838 xmlParseContent(ctxt);
13839 doc->children = content;
13840 }
13841 else {
13842 xmlParseContent(ctxt);
13843 }
Owen Taylor3473f882001-02-23 17:55:21 +000013844 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013845 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013846 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013847 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013848 }
13849 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013851 }
13852
13853 if (!ctxt->wellFormed) {
13854 if (ctxt->errNo == 0)
13855 ret = 1;
13856 else
13857 ret = ctxt->errNo;
13858 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013859 ret = 0;
13860 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013861
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013862 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13863 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013864
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013865 /*
13866 * Return the newly created nodeset after unlinking it from
13867 * they pseudo parent.
13868 */
13869 cur = newDoc->children->children;
13870 *lst = cur;
13871 while (cur != NULL) {
13872 xmlSetTreeDoc(cur, doc);
13873 cur->parent = NULL;
13874 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013875 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013876 newDoc->children->children = NULL;
13877 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013878
13879 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013880 ctxt->sax = oldsax;
13881 xmlFreeParserCtxt(ctxt);
13882 newDoc->intSubset = NULL;
13883 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013884 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013885 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013886
Owen Taylor3473f882001-02-23 17:55:21 +000013887 return(ret);
13888}
13889
13890/**
13891 * xmlSAXParseEntity:
13892 * @sax: the SAX handler block
13893 * @filename: the filename
13894 *
13895 * parse an XML external entity out of context and build a tree.
13896 * It use the given SAX function block to handle the parsing callback.
13897 * If sax is NULL, fallback to the default DOM tree building routines.
13898 *
13899 * [78] extParsedEnt ::= TextDecl? content
13900 *
13901 * This correspond to a "Well Balanced" chunk
13902 *
13903 * Returns the resulting document tree
13904 */
13905
13906xmlDocPtr
13907xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13908 xmlDocPtr ret;
13909 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013910
13911 ctxt = xmlCreateFileParserCtxt(filename);
13912 if (ctxt == NULL) {
13913 return(NULL);
13914 }
13915 if (sax != NULL) {
13916 if (ctxt->sax != NULL)
13917 xmlFree(ctxt->sax);
13918 ctxt->sax = sax;
13919 ctxt->userData = NULL;
13920 }
13921
Owen Taylor3473f882001-02-23 17:55:21 +000013922 xmlParseExtParsedEnt(ctxt);
13923
13924 if (ctxt->wellFormed)
13925 ret = ctxt->myDoc;
13926 else {
13927 ret = NULL;
13928 xmlFreeDoc(ctxt->myDoc);
13929 ctxt->myDoc = NULL;
13930 }
13931 if (sax != NULL)
13932 ctxt->sax = NULL;
13933 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013934
Owen Taylor3473f882001-02-23 17:55:21 +000013935 return(ret);
13936}
13937
13938/**
13939 * xmlParseEntity:
13940 * @filename: the filename
13941 *
13942 * parse an XML external entity out of context and build a tree.
13943 *
13944 * [78] extParsedEnt ::= TextDecl? content
13945 *
13946 * This correspond to a "Well Balanced" chunk
13947 *
13948 * Returns the resulting document tree
13949 */
13950
13951xmlDocPtr
13952xmlParseEntity(const char *filename) {
13953 return(xmlSAXParseEntity(NULL, filename));
13954}
Daniel Veillard81273902003-09-30 00:43:48 +000013955#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013956
13957/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013958 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013959 * @URL: the entity URL
13960 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013961 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013962 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013963 *
13964 * Create a parser context for an external entity
13965 * Automatic support for ZLIB/Compress compressed document is provided
13966 * by default if found at compile-time.
13967 *
13968 * Returns the new parser context or NULL
13969 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013970static xmlParserCtxtPtr
13971xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13972 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013973 xmlParserCtxtPtr ctxt;
13974 xmlParserInputPtr inputStream;
13975 char *directory = NULL;
13976 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013977
Owen Taylor3473f882001-02-23 17:55:21 +000013978 ctxt = xmlNewParserCtxt();
13979 if (ctxt == NULL) {
13980 return(NULL);
13981 }
13982
Daniel Veillard48247b42009-07-10 16:12:46 +020013983 if (pctx != NULL) {
13984 ctxt->options = pctx->options;
13985 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013986 }
13987
Owen Taylor3473f882001-02-23 17:55:21 +000013988 uri = xmlBuildURI(URL, base);
13989
13990 if (uri == NULL) {
13991 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13992 if (inputStream == NULL) {
13993 xmlFreeParserCtxt(ctxt);
13994 return(NULL);
13995 }
13996
13997 inputPush(ctxt, inputStream);
13998
13999 if ((ctxt->directory == NULL) && (directory == NULL))
14000 directory = xmlParserGetDirectory((char *)URL);
14001 if ((ctxt->directory == NULL) && (directory != NULL))
14002 ctxt->directory = directory;
14003 } else {
14004 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14005 if (inputStream == NULL) {
14006 xmlFree(uri);
14007 xmlFreeParserCtxt(ctxt);
14008 return(NULL);
14009 }
14010
14011 inputPush(ctxt, inputStream);
14012
14013 if ((ctxt->directory == NULL) && (directory == NULL))
14014 directory = xmlParserGetDirectory((char *)uri);
14015 if ((ctxt->directory == NULL) && (directory != NULL))
14016 ctxt->directory = directory;
14017 xmlFree(uri);
14018 }
Owen Taylor3473f882001-02-23 17:55:21 +000014019 return(ctxt);
14020}
14021
Rob Richards9c0aa472009-03-26 18:10:19 +000014022/**
14023 * xmlCreateEntityParserCtxt:
14024 * @URL: the entity URL
14025 * @ID: the entity PUBLIC ID
14026 * @base: a possible base for the target URI
14027 *
14028 * Create a parser context for an external entity
14029 * Automatic support for ZLIB/Compress compressed document is provided
14030 * by default if found at compile-time.
14031 *
14032 * Returns the new parser context or NULL
14033 */
14034xmlParserCtxtPtr
14035xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14036 const xmlChar *base) {
14037 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14038
14039}
14040
Owen Taylor3473f882001-02-23 17:55:21 +000014041/************************************************************************
14042 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014043 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014044 * *
14045 ************************************************************************/
14046
14047/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014048 * xmlCreateURLParserCtxt:
14049 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014050 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014051 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014052 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014053 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014054 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014055 *
14056 * Returns the new parser context or NULL
14057 */
14058xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014059xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014060{
14061 xmlParserCtxtPtr ctxt;
14062 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014063 char *directory = NULL;
14064
Owen Taylor3473f882001-02-23 17:55:21 +000014065 ctxt = xmlNewParserCtxt();
14066 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014067 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014068 return(NULL);
14069 }
14070
Daniel Veillarddf292f72005-01-16 19:00:15 +000014071 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014072 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014073 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014074
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014075 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014076 if (inputStream == NULL) {
14077 xmlFreeParserCtxt(ctxt);
14078 return(NULL);
14079 }
14080
Owen Taylor3473f882001-02-23 17:55:21 +000014081 inputPush(ctxt, inputStream);
14082 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014083 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014084 if ((ctxt->directory == NULL) && (directory != NULL))
14085 ctxt->directory = directory;
14086
14087 return(ctxt);
14088}
14089
Daniel Veillard61b93382003-11-03 14:28:31 +000014090/**
14091 * xmlCreateFileParserCtxt:
14092 * @filename: the filename
14093 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014094 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014095 * Automatic support for ZLIB/Compress compressed document is provided
14096 * by default if found at compile-time.
14097 *
14098 * Returns the new parser context or NULL
14099 */
14100xmlParserCtxtPtr
14101xmlCreateFileParserCtxt(const char *filename)
14102{
14103 return(xmlCreateURLParserCtxt(filename, 0));
14104}
14105
Daniel Veillard81273902003-09-30 00:43:48 +000014106#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014107/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014108 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014109 * @sax: the SAX handler block
14110 * @filename: the filename
14111 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14112 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014113 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014114 *
14115 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14116 * compressed document is provided by default if found at compile-time.
14117 * It use the given SAX function block to handle the parsing callback.
14118 * If sax is NULL, fallback to the default DOM tree building routines.
14119 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014120 * User data (void *) is stored within the parser context in the
14121 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014122 *
Owen Taylor3473f882001-02-23 17:55:21 +000014123 * Returns the resulting document tree
14124 */
14125
14126xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014127xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14128 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014129 xmlDocPtr ret;
14130 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014131
Daniel Veillard635ef722001-10-29 11:48:19 +000014132 xmlInitParser();
14133
Owen Taylor3473f882001-02-23 17:55:21 +000014134 ctxt = xmlCreateFileParserCtxt(filename);
14135 if (ctxt == NULL) {
14136 return(NULL);
14137 }
14138 if (sax != NULL) {
14139 if (ctxt->sax != NULL)
14140 xmlFree(ctxt->sax);
14141 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014142 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014143 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014144 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014145 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014146 }
Owen Taylor3473f882001-02-23 17:55:21 +000014147
Daniel Veillard37d2d162008-03-14 10:54:00 +000014148 if (ctxt->directory == NULL)
14149 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014150
Daniel Veillarddad3f682002-11-17 16:47:27 +000014151 ctxt->recovery = recovery;
14152
Owen Taylor3473f882001-02-23 17:55:21 +000014153 xmlParseDocument(ctxt);
14154
William M. Brackc07329e2003-09-08 01:57:30 +000014155 if ((ctxt->wellFormed) || recovery) {
14156 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014157 if (ret != NULL) {
14158 if (ctxt->input->buf->compressed > 0)
14159 ret->compression = 9;
14160 else
14161 ret->compression = ctxt->input->buf->compressed;
14162 }
William M. Brackc07329e2003-09-08 01:57:30 +000014163 }
Owen Taylor3473f882001-02-23 17:55:21 +000014164 else {
14165 ret = NULL;
14166 xmlFreeDoc(ctxt->myDoc);
14167 ctxt->myDoc = NULL;
14168 }
14169 if (sax != NULL)
14170 ctxt->sax = NULL;
14171 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014172
Owen Taylor3473f882001-02-23 17:55:21 +000014173 return(ret);
14174}
14175
14176/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014177 * xmlSAXParseFile:
14178 * @sax: the SAX handler block
14179 * @filename: the filename
14180 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14181 * documents
14182 *
14183 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14184 * compressed document is provided by default if found at compile-time.
14185 * It use the given SAX function block to handle the parsing callback.
14186 * If sax is NULL, fallback to the default DOM tree building routines.
14187 *
14188 * Returns the resulting document tree
14189 */
14190
14191xmlDocPtr
14192xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14193 int recovery) {
14194 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14195}
14196
14197/**
Owen Taylor3473f882001-02-23 17:55:21 +000014198 * xmlRecoverDoc:
14199 * @cur: a pointer to an array of xmlChar
14200 *
14201 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014202 * In the case the document is not Well Formed, a attempt to build a
14203 * tree is tried anyway
14204 *
14205 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014206 */
14207
14208xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014209xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014210 return(xmlSAXParseDoc(NULL, cur, 1));
14211}
14212
14213/**
14214 * xmlParseFile:
14215 * @filename: the filename
14216 *
14217 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14218 * compressed document is provided by default if found at compile-time.
14219 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014220 * Returns the resulting document tree if the file was wellformed,
14221 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014222 */
14223
14224xmlDocPtr
14225xmlParseFile(const char *filename) {
14226 return(xmlSAXParseFile(NULL, filename, 0));
14227}
14228
14229/**
14230 * xmlRecoverFile:
14231 * @filename: the filename
14232 *
14233 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14234 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014235 * In the case the document is not Well Formed, it attempts to build
14236 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014237 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014238 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014239 */
14240
14241xmlDocPtr
14242xmlRecoverFile(const char *filename) {
14243 return(xmlSAXParseFile(NULL, filename, 1));
14244}
14245
14246
14247/**
14248 * xmlSetupParserForBuffer:
14249 * @ctxt: an XML parser context
14250 * @buffer: a xmlChar * buffer
14251 * @filename: a file name
14252 *
14253 * Setup the parser context to parse a new buffer; Clears any prior
14254 * contents from the parser context. The buffer parameter must not be
14255 * NULL, but the filename parameter can be
14256 */
14257void
14258xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14259 const char* filename)
14260{
14261 xmlParserInputPtr input;
14262
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014263 if ((ctxt == NULL) || (buffer == NULL))
14264 return;
14265
Owen Taylor3473f882001-02-23 17:55:21 +000014266 input = xmlNewInputStream(ctxt);
14267 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014268 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014269 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014270 return;
14271 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014272
Owen Taylor3473f882001-02-23 17:55:21 +000014273 xmlClearParserCtxt(ctxt);
14274 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014275 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014276 input->base = buffer;
14277 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014278 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014279 inputPush(ctxt, input);
14280}
14281
14282/**
14283 * xmlSAXUserParseFile:
14284 * @sax: a SAX handler
14285 * @user_data: The user data returned on SAX callbacks
14286 * @filename: a file name
14287 *
14288 * parse an XML file and call the given SAX handler routines.
14289 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014290 *
Owen Taylor3473f882001-02-23 17:55:21 +000014291 * Returns 0 in case of success or a error number otherwise
14292 */
14293int
14294xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14295 const char *filename) {
14296 int ret = 0;
14297 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014298
Owen Taylor3473f882001-02-23 17:55:21 +000014299 ctxt = xmlCreateFileParserCtxt(filename);
14300 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014301 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014302 xmlFree(ctxt->sax);
14303 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014304 xmlDetectSAX2(ctxt);
14305
Owen Taylor3473f882001-02-23 17:55:21 +000014306 if (user_data != NULL)
14307 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014308
Owen Taylor3473f882001-02-23 17:55:21 +000014309 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014310
Owen Taylor3473f882001-02-23 17:55:21 +000014311 if (ctxt->wellFormed)
14312 ret = 0;
14313 else {
14314 if (ctxt->errNo != 0)
14315 ret = ctxt->errNo;
14316 else
14317 ret = -1;
14318 }
14319 if (sax != NULL)
14320 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014321 if (ctxt->myDoc != NULL) {
14322 xmlFreeDoc(ctxt->myDoc);
14323 ctxt->myDoc = NULL;
14324 }
Owen Taylor3473f882001-02-23 17:55:21 +000014325 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014326
Owen Taylor3473f882001-02-23 17:55:21 +000014327 return ret;
14328}
Daniel Veillard81273902003-09-30 00:43:48 +000014329#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014330
14331/************************************************************************
14332 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014333 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014334 * *
14335 ************************************************************************/
14336
14337/**
14338 * xmlCreateMemoryParserCtxt:
14339 * @buffer: a pointer to a char array
14340 * @size: the size of the array
14341 *
14342 * Create a parser context for an XML in-memory document.
14343 *
14344 * Returns the new parser context or NULL
14345 */
14346xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014347xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014348 xmlParserCtxtPtr ctxt;
14349 xmlParserInputPtr input;
14350 xmlParserInputBufferPtr buf;
14351
14352 if (buffer == NULL)
14353 return(NULL);
14354 if (size <= 0)
14355 return(NULL);
14356
14357 ctxt = xmlNewParserCtxt();
14358 if (ctxt == NULL)
14359 return(NULL);
14360
Daniel Veillard53350552003-09-18 13:35:51 +000014361 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014362 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014363 if (buf == NULL) {
14364 xmlFreeParserCtxt(ctxt);
14365 return(NULL);
14366 }
Owen Taylor3473f882001-02-23 17:55:21 +000014367
14368 input = xmlNewInputStream(ctxt);
14369 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014370 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014371 xmlFreeParserCtxt(ctxt);
14372 return(NULL);
14373 }
14374
14375 input->filename = NULL;
14376 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014377 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014378
14379 inputPush(ctxt, input);
14380 return(ctxt);
14381}
14382
Daniel Veillard81273902003-09-30 00:43:48 +000014383#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014384/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014385 * xmlSAXParseMemoryWithData:
14386 * @sax: the SAX handler block
14387 * @buffer: an pointer to a char array
14388 * @size: the size of the array
14389 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14390 * documents
14391 * @data: the userdata
14392 *
14393 * parse an XML in-memory block and use the given SAX function block
14394 * to handle the parsing callback. If sax is NULL, fallback to the default
14395 * DOM tree building routines.
14396 *
14397 * User data (void *) is stored within the parser context in the
14398 * context's _private member, so it is available nearly everywhere in libxml
14399 *
14400 * Returns the resulting document tree
14401 */
14402
14403xmlDocPtr
14404xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14405 int size, int recovery, void *data) {
14406 xmlDocPtr ret;
14407 xmlParserCtxtPtr ctxt;
14408
Daniel Veillardab2a7632009-07-09 08:45:03 +020014409 xmlInitParser();
14410
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014411 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14412 if (ctxt == NULL) return(NULL);
14413 if (sax != NULL) {
14414 if (ctxt->sax != NULL)
14415 xmlFree(ctxt->sax);
14416 ctxt->sax = sax;
14417 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014418 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014419 if (data!=NULL) {
14420 ctxt->_private=data;
14421 }
14422
Daniel Veillardadba5f12003-04-04 16:09:01 +000014423 ctxt->recovery = recovery;
14424
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014425 xmlParseDocument(ctxt);
14426
14427 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14428 else {
14429 ret = NULL;
14430 xmlFreeDoc(ctxt->myDoc);
14431 ctxt->myDoc = NULL;
14432 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014433 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014434 ctxt->sax = NULL;
14435 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014436
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014437 return(ret);
14438}
14439
14440/**
Owen Taylor3473f882001-02-23 17:55:21 +000014441 * xmlSAXParseMemory:
14442 * @sax: the SAX handler block
14443 * @buffer: an pointer to a char array
14444 * @size: the size of the array
14445 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14446 * documents
14447 *
14448 * parse an XML in-memory block and use the given SAX function block
14449 * to handle the parsing callback. If sax is NULL, fallback to the default
14450 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014451 *
Owen Taylor3473f882001-02-23 17:55:21 +000014452 * Returns the resulting document tree
14453 */
14454xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014455xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14456 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014457 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014458}
14459
14460/**
14461 * xmlParseMemory:
14462 * @buffer: an pointer to a char array
14463 * @size: the size of the array
14464 *
14465 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014466 *
Owen Taylor3473f882001-02-23 17:55:21 +000014467 * Returns the resulting document tree
14468 */
14469
Daniel Veillard50822cb2001-07-26 20:05:51 +000014470xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014471 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14472}
14473
14474/**
14475 * xmlRecoverMemory:
14476 * @buffer: an pointer to a char array
14477 * @size: the size of the array
14478 *
14479 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014480 * In the case the document is not Well Formed, an attempt to
14481 * build a tree is tried anyway
14482 *
14483 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014484 */
14485
Daniel Veillard50822cb2001-07-26 20:05:51 +000014486xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014487 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14488}
14489
14490/**
14491 * xmlSAXUserParseMemory:
14492 * @sax: a SAX handler
14493 * @user_data: The user data returned on SAX callbacks
14494 * @buffer: an in-memory XML document input
14495 * @size: the length of the XML document in bytes
14496 *
14497 * A better SAX parsing routine.
14498 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014499 *
Owen Taylor3473f882001-02-23 17:55:21 +000014500 * Returns 0 in case of success or a error number otherwise
14501 */
14502int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014503 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014504 int ret = 0;
14505 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014506
14507 xmlInitParser();
14508
Owen Taylor3473f882001-02-23 17:55:21 +000014509 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14510 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014511 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14512 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014513 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014514 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014515
Daniel Veillard30211a02001-04-26 09:33:18 +000014516 if (user_data != NULL)
14517 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014518
Owen Taylor3473f882001-02-23 17:55:21 +000014519 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014520
Owen Taylor3473f882001-02-23 17:55:21 +000014521 if (ctxt->wellFormed)
14522 ret = 0;
14523 else {
14524 if (ctxt->errNo != 0)
14525 ret = ctxt->errNo;
14526 else
14527 ret = -1;
14528 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014529 if (sax != NULL)
14530 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014531 if (ctxt->myDoc != NULL) {
14532 xmlFreeDoc(ctxt->myDoc);
14533 ctxt->myDoc = NULL;
14534 }
Owen Taylor3473f882001-02-23 17:55:21 +000014535 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014536
Owen Taylor3473f882001-02-23 17:55:21 +000014537 return ret;
14538}
Daniel Veillard81273902003-09-30 00:43:48 +000014539#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014540
14541/**
14542 * xmlCreateDocParserCtxt:
14543 * @cur: a pointer to an array of xmlChar
14544 *
14545 * Creates a parser context for an XML in-memory document.
14546 *
14547 * Returns the new parser context or NULL
14548 */
14549xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014550xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014551 int len;
14552
14553 if (cur == NULL)
14554 return(NULL);
14555 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014556 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014557}
14558
Daniel Veillard81273902003-09-30 00:43:48 +000014559#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014560/**
14561 * xmlSAXParseDoc:
14562 * @sax: the SAX handler block
14563 * @cur: a pointer to an array of xmlChar
14564 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14565 * documents
14566 *
14567 * parse an XML in-memory document and build a tree.
14568 * It use the given SAX function block to handle the parsing callback.
14569 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014570 *
Owen Taylor3473f882001-02-23 17:55:21 +000014571 * Returns the resulting document tree
14572 */
14573
14574xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014575xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014576 xmlDocPtr ret;
14577 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014578 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014579
Daniel Veillard38936062004-11-04 17:45:11 +000014580 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014581
14582
14583 ctxt = xmlCreateDocParserCtxt(cur);
14584 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014585 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014586 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014587 ctxt->sax = sax;
14588 ctxt->userData = NULL;
14589 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014590 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014591
14592 xmlParseDocument(ctxt);
14593 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14594 else {
14595 ret = NULL;
14596 xmlFreeDoc(ctxt->myDoc);
14597 ctxt->myDoc = NULL;
14598 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014599 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014600 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014601 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014602
Owen Taylor3473f882001-02-23 17:55:21 +000014603 return(ret);
14604}
14605
14606/**
14607 * xmlParseDoc:
14608 * @cur: a pointer to an array of xmlChar
14609 *
14610 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014611 *
Owen Taylor3473f882001-02-23 17:55:21 +000014612 * Returns the resulting document tree
14613 */
14614
14615xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014616xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014617 return(xmlSAXParseDoc(NULL, cur, 0));
14618}
Daniel Veillard81273902003-09-30 00:43:48 +000014619#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014620
Daniel Veillard81273902003-09-30 00:43:48 +000014621#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014622/************************************************************************
14623 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014624 * Specific function to keep track of entities references *
14625 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014626 * *
14627 ************************************************************************/
14628
14629static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14630
14631/**
14632 * xmlAddEntityReference:
14633 * @ent : A valid entity
14634 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014635 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014636 *
14637 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14638 */
14639static void
14640xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14641 xmlNodePtr lastNode)
14642{
14643 if (xmlEntityRefFunc != NULL) {
14644 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14645 }
14646}
14647
14648
14649/**
14650 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014651 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014652 *
14653 * Set the function to call call back when a xml reference has been made
14654 */
14655void
14656xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14657{
14658 xmlEntityRefFunc = func;
14659}
Daniel Veillard81273902003-09-30 00:43:48 +000014660#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014661
14662/************************************************************************
14663 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014664 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014665 * *
14666 ************************************************************************/
14667
14668#ifdef LIBXML_XPATH_ENABLED
14669#include <libxml/xpath.h>
14670#endif
14671
Daniel Veillardffa3c742005-07-21 13:24:09 +000014672extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014673static int xmlParserInitialized = 0;
14674
14675/**
14676 * xmlInitParser:
14677 *
14678 * Initialization function for the XML parser.
14679 * This is not reentrant. Call once before processing in case of
14680 * use in multithreaded programs.
14681 */
14682
14683void
14684xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014685 if (xmlParserInitialized != 0)
14686 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014687
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014688#ifdef LIBXML_THREAD_ENABLED
14689 __xmlGlobalInitMutexLock();
14690 if (xmlParserInitialized == 0) {
14691#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014692 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014693 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014694 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14695 (xmlGenericError == NULL))
14696 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014697 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014698 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014699 xmlInitCharEncodingHandlers();
14700 xmlDefaultSAXHandlerInit();
14701 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014702#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014703 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014704#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014705#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014706 htmlInitAutoClose();
14707 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014708#endif
14709#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014710 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014711#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014712 xmlParserInitialized = 1;
14713#ifdef LIBXML_THREAD_ENABLED
14714 }
14715 __xmlGlobalInitMutexUnlock();
14716#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014717}
14718
14719/**
14720 * xmlCleanupParser:
14721 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014722 * This function name is somewhat misleading. It does not clean up
14723 * parser state, it cleans up memory allocated by the library itself.
14724 * It is a cleanup function for the XML library. It tries to reclaim all
14725 * related global memory allocated for the library processing.
14726 * It doesn't deallocate any document related memory. One should
14727 * call xmlCleanupParser() only when the process has finished using
14728 * the library and all XML/HTML documents built with it.
14729 * See also xmlInitParser() which has the opposite function of preparing
14730 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014731 *
14732 * WARNING: if your application is multithreaded or has plugin support
14733 * calling this may crash the application if another thread or
14734 * a plugin is still using libxml2. It's sometimes very hard to
14735 * guess if libxml2 is in use in the application, some libraries
14736 * or plugins may use it without notice. In case of doubt abstain
14737 * from calling this function or do it just before calling exit()
14738 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014739 */
14740
14741void
14742xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014743 if (!xmlParserInitialized)
14744 return;
14745
Owen Taylor3473f882001-02-23 17:55:21 +000014746 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014747#ifdef LIBXML_CATALOG_ENABLED
14748 xmlCatalogCleanup();
14749#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014750 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014751 xmlCleanupInputCallbacks();
14752#ifdef LIBXML_OUTPUT_ENABLED
14753 xmlCleanupOutputCallbacks();
14754#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014755#ifdef LIBXML_SCHEMAS_ENABLED
14756 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014757 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014758#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014759 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014760 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014761 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014762 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014763 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014764}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014765
14766/************************************************************************
14767 * *
14768 * New set (2.6.0) of simpler and more flexible APIs *
14769 * *
14770 ************************************************************************/
14771
14772/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014773 * DICT_FREE:
14774 * @str: a string
14775 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014776 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014777 * current scope
14778 */
14779#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014780 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014781 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14782 xmlFree((char *)(str));
14783
14784/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014785 * xmlCtxtReset:
14786 * @ctxt: an XML parser context
14787 *
14788 * Reset a parser context
14789 */
14790void
14791xmlCtxtReset(xmlParserCtxtPtr ctxt)
14792{
14793 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014794 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014795
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014796 if (ctxt == NULL)
14797 return;
14798
14799 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014800
14801 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14802 xmlFreeInputStream(input);
14803 }
14804 ctxt->inputNr = 0;
14805 ctxt->input = NULL;
14806
14807 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014808 if (ctxt->spaceTab != NULL) {
14809 ctxt->spaceTab[0] = -1;
14810 ctxt->space = &ctxt->spaceTab[0];
14811 } else {
14812 ctxt->space = NULL;
14813 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014814
14815
14816 ctxt->nodeNr = 0;
14817 ctxt->node = NULL;
14818
14819 ctxt->nameNr = 0;
14820 ctxt->name = NULL;
14821
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014822 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014823 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014824 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014825 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014826 DICT_FREE(ctxt->directory);
14827 ctxt->directory = NULL;
14828 DICT_FREE(ctxt->extSubURI);
14829 ctxt->extSubURI = NULL;
14830 DICT_FREE(ctxt->extSubSystem);
14831 ctxt->extSubSystem = NULL;
14832 if (ctxt->myDoc != NULL)
14833 xmlFreeDoc(ctxt->myDoc);
14834 ctxt->myDoc = NULL;
14835
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014836 ctxt->standalone = -1;
14837 ctxt->hasExternalSubset = 0;
14838 ctxt->hasPErefs = 0;
14839 ctxt->html = 0;
14840 ctxt->external = 0;
14841 ctxt->instate = XML_PARSER_START;
14842 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014843
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014844 ctxt->wellFormed = 1;
14845 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014846 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014847 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014848#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014849 ctxt->vctxt.userData = ctxt;
14850 ctxt->vctxt.error = xmlParserValidityError;
14851 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014852#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014853 ctxt->record_info = 0;
14854 ctxt->nbChars = 0;
14855 ctxt->checkIndex = 0;
14856 ctxt->inSubset = 0;
14857 ctxt->errNo = XML_ERR_OK;
14858 ctxt->depth = 0;
14859 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14860 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014861 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014862 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014863 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014864 xmlInitNodeInfoSeq(&ctxt->node_seq);
14865
14866 if (ctxt->attsDefault != NULL) {
14867 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14868 ctxt->attsDefault = NULL;
14869 }
14870 if (ctxt->attsSpecial != NULL) {
14871 xmlHashFree(ctxt->attsSpecial, NULL);
14872 ctxt->attsSpecial = NULL;
14873 }
14874
Daniel Veillard4432df22003-09-28 18:58:27 +000014875#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014876 if (ctxt->catalogs != NULL)
14877 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014878#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014879 if (ctxt->lastError.code != XML_ERR_OK)
14880 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014881}
14882
14883/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014884 * xmlCtxtResetPush:
14885 * @ctxt: an XML parser context
14886 * @chunk: a pointer to an array of chars
14887 * @size: number of chars in the array
14888 * @filename: an optional file name or URI
14889 * @encoding: the document encoding, or NULL
14890 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014891 * Reset a push parser context
14892 *
14893 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014894 */
14895int
14896xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14897 int size, const char *filename, const char *encoding)
14898{
14899 xmlParserInputPtr inputStream;
14900 xmlParserInputBufferPtr buf;
14901 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14902
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014903 if (ctxt == NULL)
14904 return(1);
14905
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014906 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14907 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14908
14909 buf = xmlAllocParserInputBuffer(enc);
14910 if (buf == NULL)
14911 return(1);
14912
14913 if (ctxt == NULL) {
14914 xmlFreeParserInputBuffer(buf);
14915 return(1);
14916 }
14917
14918 xmlCtxtReset(ctxt);
14919
14920 if (ctxt->pushTab == NULL) {
14921 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14922 sizeof(xmlChar *));
14923 if (ctxt->pushTab == NULL) {
14924 xmlErrMemory(ctxt, NULL);
14925 xmlFreeParserInputBuffer(buf);
14926 return(1);
14927 }
14928 }
14929
14930 if (filename == NULL) {
14931 ctxt->directory = NULL;
14932 } else {
14933 ctxt->directory = xmlParserGetDirectory(filename);
14934 }
14935
14936 inputStream = xmlNewInputStream(ctxt);
14937 if (inputStream == NULL) {
14938 xmlFreeParserInputBuffer(buf);
14939 return(1);
14940 }
14941
14942 if (filename == NULL)
14943 inputStream->filename = NULL;
14944 else
14945 inputStream->filename = (char *)
14946 xmlCanonicPath((const xmlChar *) filename);
14947 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014948 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014949
14950 inputPush(ctxt, inputStream);
14951
14952 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14953 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014954 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14955 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014956
14957 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14958
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014959 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014960#ifdef DEBUG_PUSH
14961 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14962#endif
14963 }
14964
14965 if (encoding != NULL) {
14966 xmlCharEncodingHandlerPtr hdlr;
14967
Daniel Veillard37334572008-07-31 08:20:02 +000014968 if (ctxt->encoding != NULL)
14969 xmlFree((xmlChar *) ctxt->encoding);
14970 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14971
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014972 hdlr = xmlFindCharEncodingHandler(encoding);
14973 if (hdlr != NULL) {
14974 xmlSwitchToEncoding(ctxt, hdlr);
14975 } else {
14976 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14977 "Unsupported encoding %s\n", BAD_CAST encoding);
14978 }
14979 } else if (enc != XML_CHAR_ENCODING_NONE) {
14980 xmlSwitchEncoding(ctxt, enc);
14981 }
14982
14983 return(0);
14984}
14985
Daniel Veillard37334572008-07-31 08:20:02 +000014986
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014987/**
Daniel Veillard37334572008-07-31 08:20:02 +000014988 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014989 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014990 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014991 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014992 *
14993 * Applies the options to the parser context
14994 *
14995 * Returns 0 in case of success, the set of unknown or unimplemented options
14996 * in case of error.
14997 */
Daniel Veillard37334572008-07-31 08:20:02 +000014998static int
14999xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015000{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015001 if (ctxt == NULL)
15002 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015003 if (encoding != NULL) {
15004 if (ctxt->encoding != NULL)
15005 xmlFree((xmlChar *) ctxt->encoding);
15006 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15007 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015008 if (options & XML_PARSE_RECOVER) {
15009 ctxt->recovery = 1;
15010 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015011 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015012 } else
15013 ctxt->recovery = 0;
15014 if (options & XML_PARSE_DTDLOAD) {
15015 ctxt->loadsubset = XML_DETECT_IDS;
15016 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015017 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015018 } else
15019 ctxt->loadsubset = 0;
15020 if (options & XML_PARSE_DTDATTR) {
15021 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15022 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015023 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015024 }
15025 if (options & XML_PARSE_NOENT) {
15026 ctxt->replaceEntities = 1;
15027 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15028 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015029 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015030 } else
15031 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015032 if (options & XML_PARSE_PEDANTIC) {
15033 ctxt->pedantic = 1;
15034 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015035 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015036 } else
15037 ctxt->pedantic = 0;
15038 if (options & XML_PARSE_NOBLANKS) {
15039 ctxt->keepBlanks = 0;
15040 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15041 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015042 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 } else
15044 ctxt->keepBlanks = 1;
15045 if (options & XML_PARSE_DTDVALID) {
15046 ctxt->validate = 1;
15047 if (options & XML_PARSE_NOWARNING)
15048 ctxt->vctxt.warning = NULL;
15049 if (options & XML_PARSE_NOERROR)
15050 ctxt->vctxt.error = NULL;
15051 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015052 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015053 } else
15054 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015055 if (options & XML_PARSE_NOWARNING) {
15056 ctxt->sax->warning = NULL;
15057 options -= XML_PARSE_NOWARNING;
15058 }
15059 if (options & XML_PARSE_NOERROR) {
15060 ctxt->sax->error = NULL;
15061 ctxt->sax->fatalError = NULL;
15062 options -= XML_PARSE_NOERROR;
15063 }
Daniel Veillard81273902003-09-30 00:43:48 +000015064#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015065 if (options & XML_PARSE_SAX1) {
15066 ctxt->sax->startElement = xmlSAX2StartElement;
15067 ctxt->sax->endElement = xmlSAX2EndElement;
15068 ctxt->sax->startElementNs = NULL;
15069 ctxt->sax->endElementNs = NULL;
15070 ctxt->sax->initialized = 1;
15071 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015072 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015073 }
Daniel Veillard81273902003-09-30 00:43:48 +000015074#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015075 if (options & XML_PARSE_NODICT) {
15076 ctxt->dictNames = 0;
15077 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015078 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015079 } else {
15080 ctxt->dictNames = 1;
15081 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015082 if (options & XML_PARSE_NOCDATA) {
15083 ctxt->sax->cdataBlock = NULL;
15084 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015085 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015086 }
15087 if (options & XML_PARSE_NSCLEAN) {
15088 ctxt->options |= XML_PARSE_NSCLEAN;
15089 options -= XML_PARSE_NSCLEAN;
15090 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015091 if (options & XML_PARSE_NONET) {
15092 ctxt->options |= XML_PARSE_NONET;
15093 options -= XML_PARSE_NONET;
15094 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015095 if (options & XML_PARSE_COMPACT) {
15096 ctxt->options |= XML_PARSE_COMPACT;
15097 options -= XML_PARSE_COMPACT;
15098 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015099 if (options & XML_PARSE_OLD10) {
15100 ctxt->options |= XML_PARSE_OLD10;
15101 options -= XML_PARSE_OLD10;
15102 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015103 if (options & XML_PARSE_NOBASEFIX) {
15104 ctxt->options |= XML_PARSE_NOBASEFIX;
15105 options -= XML_PARSE_NOBASEFIX;
15106 }
15107 if (options & XML_PARSE_HUGE) {
15108 ctxt->options |= XML_PARSE_HUGE;
15109 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015110 if (ctxt->dict != NULL)
15111 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015112 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015113 if (options & XML_PARSE_OLDSAX) {
15114 ctxt->options |= XML_PARSE_OLDSAX;
15115 options -= XML_PARSE_OLDSAX;
15116 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015117 if (options & XML_PARSE_IGNORE_ENC) {
15118 ctxt->options |= XML_PARSE_IGNORE_ENC;
15119 options -= XML_PARSE_IGNORE_ENC;
15120 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015121 if (options & XML_PARSE_BIG_LINES) {
15122 ctxt->options |= XML_PARSE_BIG_LINES;
15123 options -= XML_PARSE_BIG_LINES;
15124 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015125 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015126 return (options);
15127}
15128
15129/**
Daniel Veillard37334572008-07-31 08:20:02 +000015130 * xmlCtxtUseOptions:
15131 * @ctxt: an XML parser context
15132 * @options: a combination of xmlParserOption
15133 *
15134 * Applies the options to the parser context
15135 *
15136 * Returns 0 in case of success, the set of unknown or unimplemented options
15137 * in case of error.
15138 */
15139int
15140xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15141{
15142 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15143}
15144
15145/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015146 * xmlDoRead:
15147 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015148 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015149 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015150 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015151 * @reuse: keep the context for reuse
15152 *
15153 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015154 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015155 * Returns the resulting document tree or NULL
15156 */
15157static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015158xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15159 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015160{
15161 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015162
15163 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015164 if (encoding != NULL) {
15165 xmlCharEncodingHandlerPtr hdlr;
15166
15167 hdlr = xmlFindCharEncodingHandler(encoding);
15168 if (hdlr != NULL)
15169 xmlSwitchToEncoding(ctxt, hdlr);
15170 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015171 if ((URL != NULL) && (ctxt->input != NULL) &&
15172 (ctxt->input->filename == NULL))
15173 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015174 xmlParseDocument(ctxt);
15175 if ((ctxt->wellFormed) || ctxt->recovery)
15176 ret = ctxt->myDoc;
15177 else {
15178 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015179 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015180 xmlFreeDoc(ctxt->myDoc);
15181 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015182 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015183 ctxt->myDoc = NULL;
15184 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015185 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015186 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015187
15188 return (ret);
15189}
15190
15191/**
15192 * xmlReadDoc:
15193 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015194 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015195 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015196 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015197 *
15198 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015199 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015200 * Returns the resulting document tree
15201 */
15202xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015203xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015204{
15205 xmlParserCtxtPtr ctxt;
15206
15207 if (cur == NULL)
15208 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015209 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015210
15211 ctxt = xmlCreateDocParserCtxt(cur);
15212 if (ctxt == NULL)
15213 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015214 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015215}
15216
15217/**
15218 * xmlReadFile:
15219 * @filename: a file or URL
15220 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015221 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015222 *
15223 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015224 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015225 * Returns the resulting document tree
15226 */
15227xmlDocPtr
15228xmlReadFile(const char *filename, const char *encoding, int options)
15229{
15230 xmlParserCtxtPtr ctxt;
15231
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015232 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015233 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015234 if (ctxt == NULL)
15235 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015236 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015237}
15238
15239/**
15240 * xmlReadMemory:
15241 * @buffer: a pointer to a char array
15242 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015243 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015244 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015245 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015246 *
15247 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015248 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015249 * Returns the resulting document tree
15250 */
15251xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015252xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015253{
15254 xmlParserCtxtPtr ctxt;
15255
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015256 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015257 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15258 if (ctxt == NULL)
15259 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015260 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015261}
15262
15263/**
15264 * xmlReadFd:
15265 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015266 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015267 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015268 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015269 *
15270 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015271 * NOTE that the file descriptor will not be closed when the
15272 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015273 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015274 * Returns the resulting document tree
15275 */
15276xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015277xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015278{
15279 xmlParserCtxtPtr ctxt;
15280 xmlParserInputBufferPtr input;
15281 xmlParserInputPtr stream;
15282
15283 if (fd < 0)
15284 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015285 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015286
15287 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15288 if (input == NULL)
15289 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015290 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291 ctxt = xmlNewParserCtxt();
15292 if (ctxt == NULL) {
15293 xmlFreeParserInputBuffer(input);
15294 return (NULL);
15295 }
15296 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15297 if (stream == NULL) {
15298 xmlFreeParserInputBuffer(input);
15299 xmlFreeParserCtxt(ctxt);
15300 return (NULL);
15301 }
15302 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015303 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015304}
15305
15306/**
15307 * xmlReadIO:
15308 * @ioread: an I/O read function
15309 * @ioclose: an I/O close function
15310 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015311 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015312 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015313 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015314 *
15315 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015316 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015317 * Returns the resulting document tree
15318 */
15319xmlDocPtr
15320xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015321 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015322{
15323 xmlParserCtxtPtr ctxt;
15324 xmlParserInputBufferPtr input;
15325 xmlParserInputPtr stream;
15326
15327 if (ioread == NULL)
15328 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015329 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015330
15331 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15332 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015333 if (input == NULL) {
15334 if (ioclose != NULL)
15335 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015336 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015337 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015338 ctxt = xmlNewParserCtxt();
15339 if (ctxt == NULL) {
15340 xmlFreeParserInputBuffer(input);
15341 return (NULL);
15342 }
15343 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15344 if (stream == NULL) {
15345 xmlFreeParserInputBuffer(input);
15346 xmlFreeParserCtxt(ctxt);
15347 return (NULL);
15348 }
15349 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015350 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015351}
15352
15353/**
15354 * xmlCtxtReadDoc:
15355 * @ctxt: an XML parser context
15356 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015357 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015358 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015359 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015360 *
15361 * parse an XML in-memory document and build a tree.
15362 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015363 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015364 * Returns the resulting document tree
15365 */
15366xmlDocPtr
15367xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015368 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015369{
15370 xmlParserInputPtr stream;
15371
15372 if (cur == NULL)
15373 return (NULL);
15374 if (ctxt == NULL)
15375 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015376 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015377
15378 xmlCtxtReset(ctxt);
15379
15380 stream = xmlNewStringInputStream(ctxt, cur);
15381 if (stream == NULL) {
15382 return (NULL);
15383 }
15384 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015385 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015386}
15387
15388/**
15389 * xmlCtxtReadFile:
15390 * @ctxt: an XML parser context
15391 * @filename: a file or URL
15392 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015393 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015394 *
15395 * parse an XML file from the filesystem or the network.
15396 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015397 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015398 * Returns the resulting document tree
15399 */
15400xmlDocPtr
15401xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15402 const char *encoding, int options)
15403{
15404 xmlParserInputPtr stream;
15405
15406 if (filename == NULL)
15407 return (NULL);
15408 if (ctxt == NULL)
15409 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015410 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015411
15412 xmlCtxtReset(ctxt);
15413
Daniel Veillard29614c72004-11-26 10:47:26 +000015414 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015415 if (stream == NULL) {
15416 return (NULL);
15417 }
15418 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015419 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420}
15421
15422/**
15423 * xmlCtxtReadMemory:
15424 * @ctxt: an XML parser context
15425 * @buffer: a pointer to a char array
15426 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015427 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015429 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015430 *
15431 * parse an XML in-memory document and build a tree.
15432 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015433 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015434 * Returns the resulting document tree
15435 */
15436xmlDocPtr
15437xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015438 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015439{
15440 xmlParserInputBufferPtr input;
15441 xmlParserInputPtr stream;
15442
15443 if (ctxt == NULL)
15444 return (NULL);
15445 if (buffer == NULL)
15446 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015447 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015448
15449 xmlCtxtReset(ctxt);
15450
15451 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15452 if (input == NULL) {
15453 return(NULL);
15454 }
15455
15456 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15457 if (stream == NULL) {
15458 xmlFreeParserInputBuffer(input);
15459 return(NULL);
15460 }
15461
15462 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015463 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015464}
15465
15466/**
15467 * xmlCtxtReadFd:
15468 * @ctxt: an XML parser context
15469 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015470 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015471 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015472 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015473 *
15474 * parse an XML from a file descriptor and build a tree.
15475 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015476 * NOTE that the file descriptor will not be closed when the
15477 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015478 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015479 * Returns the resulting document tree
15480 */
15481xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015482xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15483 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015484{
15485 xmlParserInputBufferPtr input;
15486 xmlParserInputPtr stream;
15487
15488 if (fd < 0)
15489 return (NULL);
15490 if (ctxt == NULL)
15491 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015492 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015493
15494 xmlCtxtReset(ctxt);
15495
15496
15497 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15498 if (input == NULL)
15499 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015500 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015501 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15502 if (stream == NULL) {
15503 xmlFreeParserInputBuffer(input);
15504 return (NULL);
15505 }
15506 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015507 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015508}
15509
15510/**
15511 * xmlCtxtReadIO:
15512 * @ctxt: an XML parser context
15513 * @ioread: an I/O read function
15514 * @ioclose: an I/O close function
15515 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015516 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015517 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015518 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015519 *
15520 * parse an XML document from I/O functions and source and build a tree.
15521 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015522 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015523 * Returns the resulting document tree
15524 */
15525xmlDocPtr
15526xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15527 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015528 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015529 const char *encoding, int options)
15530{
15531 xmlParserInputBufferPtr input;
15532 xmlParserInputPtr stream;
15533
15534 if (ioread == NULL)
15535 return (NULL);
15536 if (ctxt == NULL)
15537 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015538 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015539
15540 xmlCtxtReset(ctxt);
15541
15542 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15543 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015544 if (input == NULL) {
15545 if (ioclose != NULL)
15546 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015547 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015548 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015549 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15550 if (stream == NULL) {
15551 xmlFreeParserInputBuffer(input);
15552 return (NULL);
15553 }
15554 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015555 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015556}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015557
15558#define bottom_parser
15559#include "elfgcchack.h"