blob: f11f017af10644b0e7522dafb3daa79ff3da7dbf [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Nick Wellnhofere3890542017-10-09 00:20:01 +020041#if defined(_WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Elliott Hughese54f00d2021-05-13 08:13:46 -070090struct _xmlStartTag {
91 const xmlChar *prefix;
92 const xmlChar *URI;
93 int line;
94 int nsNr;
95};
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097static void
98xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99
Rob Richards9c0aa472009-03-26 18:10:19 +0000100static xmlParserCtxtPtr
101xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 const xmlChar *base, xmlParserCtxtPtr pctx);
103
Daniel Veillard28cd9cb2015-11-20 14:55:30 +0800104static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700106static int
107xmlParseElementStart(xmlParserCtxtPtr ctxt);
108
109static void
110xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111
Daniel Veillard0161e632008-08-28 15:36:32 +0000112/************************************************************************
113 * *
114 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
115 * *
116 ************************************************************************/
117
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +0200118#define XML_MAX_HUGE_LENGTH 1000000000
119
Daniel Veillard0161e632008-08-28 15:36:32 +0000120#define XML_PARSER_BIG_ENTITY 1000
121#define XML_PARSER_LOT_ENTITY 5000
122
123/*
124 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
125 * replacement over the size in byte of the input indicates that you have
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700126 * and exponential behaviour. A value of 10 correspond to at least 3 entity
Daniel Veillard0161e632008-08-28 15:36:32 +0000127 * replacement per byte of input.
128 */
129#define XML_PARSER_NON_LINEAR 10
130
131/*
132 * xmlParserEntityCheck
133 *
134 * Function to check non-linear entity expansion behaviour
135 * This is here to detect and stop exponential linear entity expansion
136 * This is not a limitation of the parser but a safety
137 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138 * parser option.
139 */
140static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800141xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800142 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000143{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800144 size_t consumed = 0;
Elliott Hughese54f00d2021-05-13 08:13:46 -0700145 int i;
Daniel Veillard0161e632008-08-28 15:36:32 +0000146
147 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
148 return (0);
149 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
150 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800151
152 /*
153 * This may look absurd but is needed to detect
154 * entities problems
155 */
156 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800157 (ent->content != NULL) && (ent->checked == 0) &&
158 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700159 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800160 xmlChar *rep;
161
162 ent->checked = 1;
163
Peter Simons8f30bdf2016-04-15 11:56:55 +0200164 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800165 rep = xmlStringDecodeEntities(ctxt, ent->content,
166 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200167 --ctxt->depth;
Nick Wellnhofer707ad082018-01-23 16:37:54 +0100168 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbdd66182016-05-23 12:27:58 +0800169 ent->content[0] = 0;
170 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800171
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700172 diff = ctxt->nbentities - oldnbent + 1;
173 if (diff > INT_MAX / 2)
174 diff = INT_MAX / 2;
175 ent->checked = diff * 2;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800176 if (rep != NULL) {
177 if (xmlStrchr(rep, '<'))
178 ent->checked |= 1;
179 xmlFree(rep);
180 rep = NULL;
181 }
182 }
Elliott Hughese54f00d2021-05-13 08:13:46 -0700183
184 /*
185 * Prevent entity exponential check, not just replacement while
186 * parsing the DTD
187 * The check is potentially costly so do that only once in a thousand
188 */
189 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
190 (ctxt->nbentities % 1024 == 0)) {
191 for (i = 0;i < ctxt->inputNr;i++) {
192 consumed += ctxt->inputTab[i]->consumed +
193 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
194 }
195 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 ctxt->instate = XML_PARSER_EOF;
198 return (1);
199 }
200 consumed = 0;
201 }
202
203
204
Daniel Veillard23f05e02013-02-19 10:21:49 +0800205 if (replacement != 0) {
206 if (replacement < XML_MAX_TEXT_LENGTH)
207 return(0);
208
209 /*
210 * If the volume of entity copy reaches 10 times the
211 * amount of parsed data and over the large text threshold
212 * then that's very likely to be an abuse.
213 */
214 if (ctxt->input != NULL) {
215 consumed = ctxt->input->consumed +
216 (ctxt->input->cur - ctxt->input->base);
217 }
218 consumed += ctxt->sizeentities;
219
220 if (replacement < XML_PARSER_NON_LINEAR * consumed)
221 return(0);
222 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000223 /*
224 * Do the check based on the replacement size of the entity
225 */
226 if (size < XML_PARSER_BIG_ENTITY)
227 return(0);
228
229 /*
230 * A limit on the amount of text data reasonably used
231 */
232 if (ctxt->input != NULL) {
233 consumed = ctxt->input->consumed +
234 (ctxt->input->cur - ctxt->input->base);
235 }
236 consumed += ctxt->sizeentities;
237
238 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
239 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
240 return (0);
241 } else if (ent != NULL) {
242 /*
243 * use the number of parsed entities in the replacement
244 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800245 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000246
247 /*
248 * The amount of data parsed counting entities size only once
249 */
250 if (ctxt->input != NULL) {
251 consumed = ctxt->input->consumed +
252 (ctxt->input->cur - ctxt->input->base);
253 }
254 consumed += ctxt->sizeentities;
255
256 /*
257 * Check the density of entities for the amount of data
258 * knowing an entity reference will take at least 3 bytes
259 */
260 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
261 return (0);
262 } else {
263 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800264 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000265 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800266 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
267 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
268 (ctxt->nbentities <= 10000))
269 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000270 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000271 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
272 return (1);
273}
274
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000275/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000276 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000277 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000278 * arbitrary depth limit for the XML documents that we allow to
279 * process. This is not a limitation of the parser but a safety
280 * boundary feature. It can be disabled with the XML_PARSE_HUGE
281 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000282 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000283unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000284
Daniel Veillard0fb18932003-09-07 09:14:37 +0000285
Daniel Veillard0161e632008-08-28 15:36:32 +0000286
287#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000288#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000289#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000290#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
291
Daniel Veillard1f972e92012-08-15 10:16:37 +0800292/**
293 * XML_PARSER_CHUNK_SIZE
294 *
295 * When calling GROW that's the minimal amount of data
296 * the parser expected to have received. It is not a hard
297 * limit but an optimization when reading strings like Names
298 * It is not strictly needed as long as inputs available characters
299 * are followed by 0, which should be provided by the I/O level
300 */
301#define XML_PARSER_CHUNK_SIZE 100
302
Owen Taylor3473f882001-02-23 17:55:21 +0000303/*
Owen Taylor3473f882001-02-23 17:55:21 +0000304 * List of XML prefixed PI allowed by W3C specs
305 */
306
Daniel Veillardb44025c2001-10-11 22:55:55 +0000307static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000308 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800309 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000310 NULL
311};
312
Daniel Veillarda07050d2003-10-19 14:46:32 +0000313
Owen Taylor3473f882001-02-23 17:55:21 +0000314/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200315static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
316 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000317
Daniel Veillard7d515752003-09-26 19:12:37 +0000318static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000319xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
320 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000321 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000322 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000323
Daniel Veillard37334572008-07-31 08:20:02 +0000324static int
325xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
326 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000327#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000328static void
329xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
330 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000331#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000332
Daniel Veillard7d515752003-09-26 19:12:37 +0000333static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000334xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
335 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000336
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000337static int
338xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
339
Daniel Veillarde57ec792003-09-10 10:50:59 +0000340/************************************************************************
341 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800342 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 * *
344 ************************************************************************/
345
346/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000347 * xmlErrAttributeDup:
348 * @ctxt: an XML parser context
349 * @prefix: the attribute prefix
350 * @localname: the attribute localname
351 *
352 * Handle a redefinition of attribute error
353 */
354static void
355xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
356 const xmlChar * localname)
357{
Daniel Veillard157fee02003-10-31 10:36:03 +0000358 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
359 (ctxt->instate == XML_PARSER_EOF))
360 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000361 if (ctxt != NULL)
362 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200363
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000364 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000365 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200366 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 (const char *) localname, NULL, NULL, 0, 0,
368 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000369 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000370 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200371 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 (const char *) prefix, (const char *) localname,
373 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
374 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000375 if (ctxt != NULL) {
376 ctxt->wellFormed = 0;
377 if (ctxt->recovery == 0)
378 ctxt->disableSAX = 1;
379 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380}
381
382/**
383 * xmlFatalErr:
384 * @ctxt: an XML parser context
385 * @error: the error number
386 * @extra: extra information string
387 *
388 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
389 */
390static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000392{
393 const char *errmsg;
394
Daniel Veillard157fee02003-10-31 10:36:03 +0000395 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
396 (ctxt->instate == XML_PARSER_EOF))
397 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 switch (error) {
399 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800400 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000402 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800403 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000405 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800406 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000408 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 errmsg = "internal error";
410 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000411 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800412 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000413 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000414 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800415 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000416 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000417 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800418 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000419 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000420 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800421 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000422 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000423 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800424 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000425 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000426 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800427 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000429 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800430 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000431 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000432 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800433 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000434 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000435 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800436 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000438 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800439 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000440 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000441 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 errmsg = "Fragment not allowed";
479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000502 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800503 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000504 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000507 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800509 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000510 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000511 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800512 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000513 break;
514 case XML_ERR_CONDSEC_INVALID_KEYWORD:
515 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800516 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000517 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000518 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800519 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000520 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000521 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800522 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000523 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000524 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800525 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000526 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000527 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800528 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000529 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000530 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800531 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000532 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000533 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800534 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000535 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000536 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800537 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000538 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000539 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800540 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000541 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000542 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800543 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000544 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000545 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800546 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000547 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000548 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800549 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000550 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000551 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800552 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000553 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000554 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800555 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000556 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000557 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800558 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000559 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000560 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800561 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000562 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000563 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800564 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000565 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000566 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800567 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000568 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800569 case XML_ERR_NAME_TOO_LONG:
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +0200570 errmsg = "Name too long";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800571 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000572#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000573 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800574 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000575 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000576#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000577 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800578 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000579 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000580 if (ctxt != NULL)
581 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800582 if (info == NULL) {
583 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
584 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
585 errmsg);
586 } else {
587 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
588 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
589 errmsg, info);
590 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000591 if (ctxt != NULL) {
592 ctxt->wellFormed = 0;
593 if (ctxt->recovery == 0)
594 ctxt->disableSAX = 1;
595 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000596}
597
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000598/**
599 * xmlFatalErrMsg:
600 * @ctxt: an XML parser context
601 * @error: the error number
602 * @msg: the error message
603 *
604 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
605 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800606static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000607xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
608 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000609{
Daniel Veillard157fee02003-10-31 10:36:03 +0000610 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
611 (ctxt->instate == XML_PARSER_EOF))
612 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000613 if (ctxt != NULL)
614 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000615 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200616 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 if (ctxt != NULL) {
618 ctxt->wellFormed = 0;
619 if (ctxt->recovery == 0)
620 ctxt->disableSAX = 1;
621 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000622}
623
624/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625 * xmlWarningMsg:
626 * @ctxt: an XML parser context
627 * @error: the error number
628 * @msg: the error message
629 * @str1: extra data
630 * @str2: extra data
631 *
632 * Handle a warning.
633 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800634static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000635xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
636 const char *msg, const xmlChar *str1, const xmlChar *str2)
637{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000638 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000639
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
644 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000645 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200646 if (ctxt != NULL) {
647 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000648 (ctxt->sax) ? ctxt->sax->warning : NULL,
649 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000650 ctxt, NULL, XML_FROM_PARSER, error,
651 XML_ERR_WARNING, NULL, 0,
652 (const char *) str1, (const char *) str2, NULL, 0, 0,
653 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200654 } else {
655 __xmlRaiseError(schannel, NULL, NULL,
656 ctxt, NULL, XML_FROM_PARSER, error,
657 XML_ERR_WARNING, NULL, 0,
658 (const char *) str1, (const char *) str2, NULL, 0, 0,
659 msg, (const char *) str1, (const char *) str2);
660 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000661}
662
663/**
664 * xmlValidityError:
665 * @ctxt: an XML parser context
666 * @error: the error number
667 * @msg: the error message
668 * @str1: extra data
669 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000670 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000671 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800672static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000673xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000674 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000675{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000676 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000677
678 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
679 (ctxt->instate == XML_PARSER_EOF))
680 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000681 if (ctxt != NULL) {
682 ctxt->errNo = error;
683 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
684 schannel = ctxt->sax->serror;
685 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200686 if (ctxt != NULL) {
687 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000688 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000689 ctxt, NULL, XML_FROM_DTD, error,
690 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000691 (const char *) str2, NULL, 0, 0,
692 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000693 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200694 } else {
695 __xmlRaiseError(schannel, NULL, NULL,
696 ctxt, NULL, XML_FROM_DTD, error,
697 XML_ERR_ERROR, NULL, 0, (const char *) str1,
698 (const char *) str2, NULL, 0, 0,
699 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000700 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000701}
702
703/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000704 * xmlFatalErrMsgInt:
705 * @ctxt: an XML parser context
706 * @error: the error number
707 * @msg: the error message
708 * @val: an integer value
709 *
710 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
711 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800712static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000713xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000714 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000715{
Daniel Veillard157fee02003-10-31 10:36:03 +0000716 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
717 (ctxt->instate == XML_PARSER_EOF))
718 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000719 if (ctxt != NULL)
720 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000721 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000722 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
723 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000724 if (ctxt != NULL) {
725 ctxt->wellFormed = 0;
726 if (ctxt->recovery == 0)
727 ctxt->disableSAX = 1;
728 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000729}
730
731/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000732 * xmlFatalErrMsgStrIntStr:
733 * @ctxt: an XML parser context
734 * @error: the error number
735 * @msg: the error message
736 * @str1: an string info
737 * @val: an integer value
738 * @str2: an string info
739 *
740 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
741 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800742static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000743xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800744 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000745 const xmlChar *str2)
746{
Daniel Veillard157fee02003-10-31 10:36:03 +0000747 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
748 (ctxt->instate == XML_PARSER_EOF))
749 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000750 if (ctxt != NULL)
751 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000752 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000753 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
754 NULL, 0, (const char *) str1, (const char *) str2,
755 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000756 if (ctxt != NULL) {
757 ctxt->wellFormed = 0;
758 if (ctxt->recovery == 0)
759 ctxt->disableSAX = 1;
760 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000761}
762
763/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000764 * xmlFatalErrMsgStr:
765 * @ctxt: an XML parser context
766 * @error: the error number
767 * @msg: the error message
768 * @val: a string value
769 *
770 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
771 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800772static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000773xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000774 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000775{
Daniel Veillard157fee02003-10-31 10:36:03 +0000776 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
777 (ctxt->instate == XML_PARSER_EOF))
778 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000779 if (ctxt != NULL)
780 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000781 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000782 XML_FROM_PARSER, error, XML_ERR_FATAL,
783 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
784 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000785 if (ctxt != NULL) {
786 ctxt->wellFormed = 0;
787 if (ctxt->recovery == 0)
788 ctxt->disableSAX = 1;
789 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000790}
791
792/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000793 * xmlErrMsgStr:
794 * @ctxt: an XML parser context
795 * @error: the error number
796 * @msg: the error message
797 * @val: a string value
798 *
799 * Handle a non fatal parser error
800 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800801static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000802xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
803 const char *msg, const xmlChar * val)
804{
Daniel Veillard157fee02003-10-31 10:36:03 +0000805 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
806 (ctxt->instate == XML_PARSER_EOF))
807 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000808 if (ctxt != NULL)
809 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000810 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000811 XML_FROM_PARSER, error, XML_ERR_ERROR,
812 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
813 val);
814}
815
816/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000817 * xmlNsErr:
818 * @ctxt: an XML parser context
819 * @error: the error number
820 * @msg: the message
821 * @info1: extra information string
822 * @info2: extra information string
823 *
824 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
825 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800826static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000827xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
828 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000829 const xmlChar * info1, const xmlChar * info2,
830 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000831{
Daniel Veillard157fee02003-10-31 10:36:03 +0000832 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
833 (ctxt->instate == XML_PARSER_EOF))
834 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000835 if (ctxt != NULL)
836 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000837 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000838 XML_ERR_ERROR, NULL, 0, (const char *) info1,
839 (const char *) info2, (const char *) info3, 0, 0, msg,
840 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000841 if (ctxt != NULL)
842 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000843}
844
Daniel Veillard37334572008-07-31 08:20:02 +0000845/**
846 * xmlNsWarn
847 * @ctxt: an XML parser context
848 * @error: the error number
849 * @msg: the message
850 * @info1: extra information string
851 * @info2: extra information string
852 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800853 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000854 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800855static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000856xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
857 const char *msg,
858 const xmlChar * info1, const xmlChar * info2,
859 const xmlChar * info3)
860{
861 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
862 (ctxt->instate == XML_PARSER_EOF))
863 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000864 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
865 XML_ERR_WARNING, NULL, 0, (const char *) info1,
866 (const char *) info2, (const char *) info3, 0, 0, msg,
867 info1, info2, info3);
868}
869
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000870/************************************************************************
871 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800872 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000873 * *
874 ************************************************************************/
875
876/**
877 * xmlHasFeature:
878 * @feature: the feature to be examined
879 *
880 * Examines if the library has been compiled with a given feature.
881 *
882 * Returns a non-zero value if the feature exist, otherwise zero.
883 * Returns zero (0) if the feature does not exist or an unknown
884 * unknown feature is requested, non-zero otherwise.
885 */
886int
887xmlHasFeature(xmlFeature feature)
888{
889 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000890 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000891#ifdef LIBXML_THREAD_ENABLED
892 return(1);
893#else
894 return(0);
895#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000896 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000897#ifdef LIBXML_TREE_ENABLED
898 return(1);
899#else
900 return(0);
901#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000902 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000903#ifdef LIBXML_OUTPUT_ENABLED
904 return(1);
905#else
906 return(0);
907#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000908 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000909#ifdef LIBXML_PUSH_ENABLED
910 return(1);
911#else
912 return(0);
913#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000914 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000915#ifdef LIBXML_READER_ENABLED
916 return(1);
917#else
918 return(0);
919#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000920 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000921#ifdef LIBXML_PATTERN_ENABLED
922 return(1);
923#else
924 return(0);
925#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000926 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000927#ifdef LIBXML_WRITER_ENABLED
928 return(1);
929#else
930 return(0);
931#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000932 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000933#ifdef LIBXML_SAX1_ENABLED
934 return(1);
935#else
936 return(0);
937#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000938 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000939#ifdef LIBXML_FTP_ENABLED
940 return(1);
941#else
942 return(0);
943#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000944 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000945#ifdef LIBXML_HTTP_ENABLED
946 return(1);
947#else
948 return(0);
949#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000950 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000951#ifdef LIBXML_VALID_ENABLED
952 return(1);
953#else
954 return(0);
955#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000956 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000957#ifdef LIBXML_HTML_ENABLED
958 return(1);
959#else
960 return(0);
961#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000962 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000963#ifdef LIBXML_LEGACY_ENABLED
964 return(1);
965#else
966 return(0);
967#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000968 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000969#ifdef LIBXML_C14N_ENABLED
970 return(1);
971#else
972 return(0);
973#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000974 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000975#ifdef LIBXML_CATALOG_ENABLED
976 return(1);
977#else
978 return(0);
979#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000980 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000981#ifdef LIBXML_XPATH_ENABLED
982 return(1);
983#else
984 return(0);
985#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000986 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000987#ifdef LIBXML_XPTR_ENABLED
988 return(1);
989#else
990 return(0);
991#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000992 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000993#ifdef LIBXML_XINCLUDE_ENABLED
994 return(1);
995#else
996 return(0);
997#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000998 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000999#ifdef LIBXML_ICONV_ENABLED
1000 return(1);
1001#else
1002 return(0);
1003#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001004 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001005#ifdef LIBXML_ISO8859X_ENABLED
1006 return(1);
1007#else
1008 return(0);
1009#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001010 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001011#ifdef LIBXML_UNICODE_ENABLED
1012 return(1);
1013#else
1014 return(0);
1015#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001016 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001017#ifdef LIBXML_REGEXP_ENABLED
1018 return(1);
1019#else
1020 return(0);
1021#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001022 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001023#ifdef LIBXML_AUTOMATA_ENABLED
1024 return(1);
1025#else
1026 return(0);
1027#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001028 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001029#ifdef LIBXML_EXPR_ENABLED
1030 return(1);
1031#else
1032 return(0);
1033#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001034 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001035#ifdef LIBXML_SCHEMAS_ENABLED
1036 return(1);
1037#else
1038 return(0);
1039#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001040 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001041#ifdef LIBXML_SCHEMATRON_ENABLED
1042 return(1);
1043#else
1044 return(0);
1045#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001046 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001047#ifdef LIBXML_MODULES_ENABLED
1048 return(1);
1049#else
1050 return(0);
1051#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001052 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001053#ifdef LIBXML_DEBUG_ENABLED
1054 return(1);
1055#else
1056 return(0);
1057#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001058 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001059#ifdef DEBUG_MEMORY_LOCATION
1060 return(1);
1061#else
1062 return(0);
1063#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001064 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001065#ifdef LIBXML_DEBUG_RUNTIME
1066 return(1);
1067#else
1068 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001069#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001070 case XML_WITH_ZLIB:
1071#ifdef LIBXML_ZLIB_ENABLED
1072 return(1);
1073#else
1074 return(0);
1075#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001076 case XML_WITH_LZMA:
1077#ifdef LIBXML_LZMA_ENABLED
1078 return(1);
1079#else
1080 return(0);
1081#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001082 case XML_WITH_ICU:
1083#ifdef LIBXML_ICU_ENABLED
1084 return(1);
1085#else
1086 return(0);
1087#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001088 default:
1089 break;
1090 }
1091 return(0);
1092}
1093
1094/************************************************************************
1095 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001096 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001097 * *
1098 ************************************************************************/
1099
1100/**
1101 * xmlDetectSAX2:
1102 * @ctxt: an XML parser context
1103 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001104 * Do the SAX2 detection and specific initialization
Daniel Veillarde57ec792003-09-10 10:50:59 +00001105 */
1106static void
1107xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
Haibo Huangf0a546b2020-09-01 20:28:19 -07001108 xmlSAXHandlerPtr sax;
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001109
1110 /* Avoid unused variable warning if features are disabled. */
1111 (void) sax;
1112
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt == NULL) return;
Haibo Huangf0a546b2020-09-01 20:28:19 -07001114 sax = ctxt->sax;
Daniel Veillard81273902003-09-30 00:43:48 +00001115#ifdef LIBXML_SAX1_ENABLED
Haibo Huangf0a546b2020-09-01 20:28:19 -07001116 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1117 ((sax->startElementNs != NULL) ||
1118 (sax->endElementNs != NULL) ||
1119 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1120 ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001121#else
1122 ctxt->sax2 = 1;
1123#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124
1125 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1126 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1127 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001128 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1129 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001130 xmlErrMemory(ctxt, NULL);
1131 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001132}
1133
Daniel Veillarde57ec792003-09-10 10:50:59 +00001134typedef struct _xmlDefAttrs xmlDefAttrs;
1135typedef xmlDefAttrs *xmlDefAttrsPtr;
1136struct _xmlDefAttrs {
1137 int nbAttrs; /* number of defaulted attributes on that element */
1138 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001139#if __STDC_VERSION__ >= 199901L
1140 /* Using a C99 flexible array member avoids UBSan errors. */
1141 const xmlChar *values[]; /* array of localname/prefix/values/external */
1142#else
1143 const xmlChar *values[5];
1144#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001146
1147/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001148 * xmlAttrNormalizeSpace:
1149 * @src: the source string
1150 * @dst: the target string
1151 *
1152 * Normalize the space in non CDATA attribute values:
1153 * If the attribute type is not CDATA, then the XML processor MUST further
1154 * process the normalized attribute value by discarding any leading and
1155 * trailing space (#x20) characters, and by replacing sequences of space
1156 * (#x20) characters by a single space (#x20) character.
1157 * Note that the size of dst need to be at least src, and if one doesn't need
1158 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1159 * passing src as dst is just fine.
1160 *
1161 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1162 * is needed.
1163 */
1164static xmlChar *
1165xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1166{
1167 if ((src == NULL) || (dst == NULL))
1168 return(NULL);
1169
1170 while (*src == 0x20) src++;
1171 while (*src != 0) {
1172 if (*src == 0x20) {
1173 while (*src == 0x20) src++;
1174 if (*src != 0)
1175 *dst++ = 0x20;
1176 } else {
1177 *dst++ = *src++;
1178 }
1179 }
1180 *dst = 0;
1181 if (dst == src)
1182 return(NULL);
1183 return(dst);
1184}
1185
1186/**
1187 * xmlAttrNormalizeSpace2:
1188 * @src: the source string
1189 *
1190 * Normalize the space in non CDATA attribute values, a slightly more complex
1191 * front end to avoid allocation problems when running on attribute values
1192 * coming from the input.
1193 *
1194 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1195 * is needed.
1196 */
1197static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001198xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001199{
1200 int i;
1201 int remove_head = 0;
1202 int need_realloc = 0;
1203 const xmlChar *cur;
1204
1205 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1206 return(NULL);
1207 i = *len;
1208 if (i <= 0)
1209 return(NULL);
1210
1211 cur = src;
1212 while (*cur == 0x20) {
1213 cur++;
1214 remove_head++;
1215 }
1216 while (*cur != 0) {
1217 if (*cur == 0x20) {
1218 cur++;
1219 if ((*cur == 0x20) || (*cur == 0)) {
1220 need_realloc = 1;
1221 break;
1222 }
1223 } else
1224 cur++;
1225 }
1226 if (need_realloc) {
1227 xmlChar *ret;
1228
1229 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1230 if (ret == NULL) {
1231 xmlErrMemory(ctxt, NULL);
1232 return(NULL);
1233 }
1234 xmlAttrNormalizeSpace(ret, ret);
1235 *len = (int) strlen((const char *)ret);
1236 return(ret);
1237 } else if (remove_head) {
1238 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001239 memmove(src, src + remove_head, 1 + *len);
1240 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001241 }
1242 return(NULL);
1243}
1244
1245/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001246 * xmlAddDefAttrs:
1247 * @ctxt: an XML parser context
1248 * @fullname: the element fullname
1249 * @fullattr: the attribute fullname
1250 * @value: the attribute value
1251 *
1252 * Add a defaulted attribute for an element
1253 */
1254static void
1255xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1256 const xmlChar *fullname,
1257 const xmlChar *fullattr,
1258 const xmlChar *value) {
1259 xmlDefAttrsPtr defaults;
1260 int len;
1261 const xmlChar *name;
1262 const xmlChar *prefix;
1263
Daniel Veillard6a31b832008-03-26 14:06:44 +00001264 /*
1265 * Allows to detect attribute redefinitions
1266 */
1267 if (ctxt->attsSpecial != NULL) {
1268 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1269 return;
1270 }
1271
Daniel Veillarde57ec792003-09-10 10:50:59 +00001272 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001273 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001274 if (ctxt->attsDefault == NULL)
1275 goto mem_error;
1276 }
1277
1278 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001279 * split the element name into prefix:localname , the string found
1280 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001281 */
1282 name = xmlSplitQName3(fullname, &len);
1283 if (name == NULL) {
1284 name = xmlDictLookup(ctxt->dict, fullname, -1);
1285 prefix = NULL;
1286 } else {
1287 name = xmlDictLookup(ctxt->dict, name, -1);
1288 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1289 }
1290
1291 /*
1292 * make sure there is some storage
1293 */
1294 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1295 if (defaults == NULL) {
1296 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001297 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001298 if (defaults == NULL)
1299 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001300 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001301 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001302 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1303 defaults, NULL) < 0) {
1304 xmlFree(defaults);
1305 goto mem_error;
1306 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001307 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001308 xmlDefAttrsPtr temp;
1309
1310 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001311 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001312 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001313 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001314 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001315 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001316 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1317 defaults, NULL) < 0) {
1318 xmlFree(defaults);
1319 goto mem_error;
1320 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001321 }
1322
1323 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001324 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001325 * are within the DTD and hen not associated to namespace names.
1326 */
1327 name = xmlSplitQName3(fullattr, &len);
1328 if (name == NULL) {
1329 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1330 prefix = NULL;
1331 } else {
1332 name = xmlDictLookup(ctxt->dict, name, -1);
1333 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1334 }
1335
Daniel Veillardae0765b2008-07-31 19:54:59 +00001336 defaults->values[5 * defaults->nbAttrs] = name;
1337 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001338 /* intern the string and precompute the end */
1339 len = xmlStrlen(value);
1340 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001341 defaults->values[5 * defaults->nbAttrs + 2] = value;
1342 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1343 if (ctxt->external)
1344 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1345 else
1346 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001347 defaults->nbAttrs++;
1348
1349 return;
1350
1351mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001352 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001353 return;
1354}
1355
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001356/**
1357 * xmlAddSpecialAttr:
1358 * @ctxt: an XML parser context
1359 * @fullname: the element fullname
1360 * @fullattr: the attribute fullname
1361 * @type: the attribute type
1362 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001363 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001364 */
1365static void
1366xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1367 const xmlChar *fullname,
1368 const xmlChar *fullattr,
1369 int type)
1370{
1371 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001372 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001373 if (ctxt->attsSpecial == NULL)
1374 goto mem_error;
1375 }
1376
Daniel Veillardac4118d2008-01-11 05:27:32 +00001377 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1378 return;
1379
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001380 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001381 (void *) (ptrdiff_t) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001382 return;
1383
1384mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001385 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001386 return;
1387}
1388
Daniel Veillard4432df22003-09-28 18:58:27 +00001389/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001390 * xmlCleanSpecialAttrCallback:
1391 *
1392 * Removes CDATA attributes from the special attribute table
1393 */
1394static void
1395xmlCleanSpecialAttrCallback(void *payload, void *data,
1396 const xmlChar *fullname, const xmlChar *fullattr,
1397 const xmlChar *unused ATTRIBUTE_UNUSED) {
1398 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1399
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001400 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001401 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1402 }
1403}
1404
1405/**
1406 * xmlCleanSpecialAttr:
1407 * @ctxt: an XML parser context
1408 *
1409 * Trim the list of attributes defined to remove all those of type
1410 * CDATA as they are not special. This call should be done when finishing
1411 * to parse the DTD and before starting to parse the document root.
1412 */
1413static void
1414xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1415{
1416 if (ctxt->attsSpecial == NULL)
1417 return;
1418
1419 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1420
1421 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1422 xmlHashFree(ctxt->attsSpecial, NULL);
1423 ctxt->attsSpecial = NULL;
1424 }
1425 return;
1426}
1427
1428/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001429 * xmlCheckLanguageID:
1430 * @lang: pointer to the string value
1431 *
1432 * Checks that the value conforms to the LanguageID production:
1433 *
1434 * NOTE: this is somewhat deprecated, those productions were removed from
1435 * the XML Second edition.
1436 *
1437 * [33] LanguageID ::= Langcode ('-' Subcode)*
1438 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1439 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1440 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1441 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1442 * [38] Subcode ::= ([a-z] | [A-Z])+
1443 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001444 * The current REC reference the successors of RFC 1766, currently 5646
Daniel Veillard60587d62010-11-04 15:16:27 +01001445 *
1446 * http://www.rfc-editor.org/rfc/rfc5646.txt
1447 * langtag = language
1448 * ["-" script]
1449 * ["-" region]
1450 * *("-" variant)
1451 * *("-" extension)
1452 * ["-" privateuse]
1453 * language = 2*3ALPHA ; shortest ISO 639 code
1454 * ["-" extlang] ; sometimes followed by
1455 * ; extended language subtags
1456 * / 4ALPHA ; or reserved for future use
1457 * / 5*8ALPHA ; or registered language subtag
1458 *
1459 * extlang = 3ALPHA ; selected ISO 639 codes
1460 * *2("-" 3ALPHA) ; permanently reserved
1461 *
1462 * script = 4ALPHA ; ISO 15924 code
1463 *
1464 * region = 2ALPHA ; ISO 3166-1 code
1465 * / 3DIGIT ; UN M.49 code
1466 *
1467 * variant = 5*8alphanum ; registered variants
1468 * / (DIGIT 3alphanum)
1469 *
1470 * extension = singleton 1*("-" (2*8alphanum))
1471 *
1472 * ; Single alphanumerics
1473 * ; "x" reserved for private use
1474 * singleton = DIGIT ; 0 - 9
1475 * / %x41-57 ; A - W
1476 * / %x59-5A ; Y - Z
1477 * / %x61-77 ; a - w
1478 * / %x79-7A ; y - z
1479 *
1480 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1481 * The parser below doesn't try to cope with extension or privateuse
1482 * that could be added but that's not interoperable anyway
1483 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001484 * Returns 1 if correct 0 otherwise
1485 **/
1486int
1487xmlCheckLanguageID(const xmlChar * lang)
1488{
Daniel Veillard60587d62010-11-04 15:16:27 +01001489 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001490
1491 if (cur == NULL)
1492 return (0);
1493 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001494 ((cur[0] == 'I') && (cur[1] == '-')) ||
1495 ((cur[0] == 'x') && (cur[1] == '-')) ||
1496 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001497 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001498 * Still allow IANA code and user code which were coming
1499 * from the previous version of the XML-1.0 specification
1500 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001501 */
1502 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001503 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001504 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1505 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001506 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001507 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001508 nxt = cur;
1509 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1510 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1511 nxt++;
1512 if (nxt - cur >= 4) {
1513 /*
1514 * Reserved
1515 */
1516 if ((nxt - cur > 8) || (nxt[0] != 0))
1517 return(0);
1518 return(1);
1519 }
1520 if (nxt - cur < 2)
1521 return(0);
1522 /* we got an ISO 639 code */
1523 if (nxt[0] == 0)
1524 return(1);
1525 if (nxt[0] != '-')
1526 return(0);
1527
1528 nxt++;
1529 cur = nxt;
1530 /* now we can have extlang or script or region or variant */
1531 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1532 goto region_m49;
1533
1534 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1535 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1536 nxt++;
1537 if (nxt - cur == 4)
1538 goto script;
1539 if (nxt - cur == 2)
1540 goto region;
1541 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1542 goto variant;
1543 if (nxt - cur != 3)
1544 return(0);
1545 /* we parsed an extlang */
1546 if (nxt[0] == 0)
1547 return(1);
1548 if (nxt[0] != '-')
1549 return(0);
1550
1551 nxt++;
1552 cur = nxt;
1553 /* now we can have script or region or variant */
1554 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1555 goto region_m49;
1556
1557 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1558 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1559 nxt++;
1560 if (nxt - cur == 2)
1561 goto region;
1562 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1563 goto variant;
1564 if (nxt - cur != 4)
1565 return(0);
1566 /* we parsed a script */
1567script:
1568 if (nxt[0] == 0)
1569 return(1);
1570 if (nxt[0] != '-')
1571 return(0);
1572
1573 nxt++;
1574 cur = nxt;
1575 /* now we can have region or variant */
1576 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1577 goto region_m49;
1578
1579 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1580 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1581 nxt++;
1582
1583 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1584 goto variant;
1585 if (nxt - cur != 2)
1586 return(0);
1587 /* we parsed a region */
1588region:
1589 if (nxt[0] == 0)
1590 return(1);
1591 if (nxt[0] != '-')
1592 return(0);
1593
1594 nxt++;
1595 cur = nxt;
1596 /* now we can just have a variant */
1597 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1598 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1599 nxt++;
1600
1601 if ((nxt - cur < 5) || (nxt - cur > 8))
1602 return(0);
1603
1604 /* we parsed a variant */
1605variant:
1606 if (nxt[0] == 0)
1607 return(1);
1608 if (nxt[0] != '-')
1609 return(0);
1610 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001611 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001612
1613region_m49:
1614 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1615 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1616 nxt += 3;
1617 goto region;
1618 }
1619 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001620}
1621
Owen Taylor3473f882001-02-23 17:55:21 +00001622/************************************************************************
1623 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001624 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001625 * *
1626 ************************************************************************/
1627
Daniel Veillard8ed10722009-08-20 19:17:36 +02001628static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1629 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001630
Daniel Veillard0fb18932003-09-07 09:14:37 +00001631#ifdef SAX2
1632/**
1633 * nsPush:
1634 * @ctxt: an XML parser context
1635 * @prefix: the namespace prefix or NULL
1636 * @URL: the namespace name
1637 *
1638 * Pushes a new parser namespace on top of the ns stack
1639 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001640 * Returns -1 in case of error, -2 if the namespace should be discarded
1641 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001642 */
1643static int
1644nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1645{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001646 if (ctxt->options & XML_PARSE_NSCLEAN) {
1647 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001648 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001649 if (ctxt->nsTab[i] == prefix) {
1650 /* in scope */
1651 if (ctxt->nsTab[i + 1] == URL)
1652 return(-2);
1653 /* out of scope keep it */
1654 break;
1655 }
1656 }
1657 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001658 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1659 ctxt->nsMax = 10;
1660 ctxt->nsNr = 0;
1661 ctxt->nsTab = (const xmlChar **)
1662 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1663 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001664 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001665 ctxt->nsMax = 0;
1666 return (-1);
1667 }
1668 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001669 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001670 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001671 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1672 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1673 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001674 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001675 ctxt->nsMax /= 2;
1676 return (-1);
1677 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001678 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001679 }
1680 ctxt->nsTab[ctxt->nsNr++] = prefix;
1681 ctxt->nsTab[ctxt->nsNr++] = URL;
1682 return (ctxt->nsNr);
1683}
1684/**
1685 * nsPop:
1686 * @ctxt: an XML parser context
1687 * @nr: the number to pop
1688 *
1689 * Pops the top @nr parser prefix/namespace from the ns stack
1690 *
1691 * Returns the number of namespaces removed
1692 */
1693static int
1694nsPop(xmlParserCtxtPtr ctxt, int nr)
1695{
1696 int i;
1697
1698 if (ctxt->nsTab == NULL) return(0);
1699 if (ctxt->nsNr < nr) {
1700 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1701 nr = ctxt->nsNr;
1702 }
1703 if (ctxt->nsNr <= 0)
1704 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001705
Daniel Veillard0fb18932003-09-07 09:14:37 +00001706 for (i = 0;i < nr;i++) {
1707 ctxt->nsNr--;
1708 ctxt->nsTab[ctxt->nsNr] = NULL;
1709 }
1710 return(nr);
1711}
1712#endif
1713
1714static int
1715xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1716 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001717 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001718 int maxatts;
1719
1720 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001721 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001722 atts = (const xmlChar **)
1723 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001724 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001725 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001726 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1727 if (attallocs == NULL) goto mem_error;
1728 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001729 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001730 } else if (nr + 5 > ctxt->maxatts) {
1731 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001732 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1733 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001734 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001735 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001736 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1737 (maxatts / 5) * sizeof(int));
1738 if (attallocs == NULL) goto mem_error;
1739 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001740 ctxt->maxatts = maxatts;
1741 }
1742 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001743mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001744 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001745 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001746}
1747
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001748/**
1749 * inputPush:
1750 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001751 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001752 *
1753 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001754 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001755 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001756 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001757int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001758inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1759{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001760 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001761 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001762 if (ctxt->inputNr >= ctxt->inputMax) {
1763 ctxt->inputMax *= 2;
1764 ctxt->inputTab =
1765 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1766 ctxt->inputMax *
1767 sizeof(ctxt->inputTab[0]));
1768 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001769 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001770 xmlFreeInputStream(value);
1771 ctxt->inputMax /= 2;
1772 value = NULL;
1773 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001774 }
1775 }
1776 ctxt->inputTab[ctxt->inputNr] = value;
1777 ctxt->input = value;
1778 return (ctxt->inputNr++);
1779}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001780/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001781 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001782 * @ctxt: an XML parser context
1783 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001784 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001785 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001786 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001787 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001788xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001789inputPop(xmlParserCtxtPtr ctxt)
1790{
1791 xmlParserInputPtr ret;
1792
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001793 if (ctxt == NULL)
1794 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001795 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001796 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001797 ctxt->inputNr--;
1798 if (ctxt->inputNr > 0)
1799 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1800 else
1801 ctxt->input = NULL;
1802 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001803 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001804 return (ret);
1805}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001806/**
1807 * nodePush:
1808 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001809 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001810 *
1811 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001812 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001813 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001814 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001815int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001816nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1817{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001818 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001819 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001820 xmlNodePtr *tmp;
1821
1822 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1823 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001824 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001825 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001827 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001828 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001829 ctxt->nodeTab = tmp;
1830 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001831 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001832 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1833 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001834 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001835 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001836 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001837 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001838 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001839 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001840 ctxt->nodeTab[ctxt->nodeNr] = value;
1841 ctxt->node = value;
1842 return (ctxt->nodeNr++);
1843}
Daniel Veillard8915c152008-08-26 13:05:34 +00001844
Daniel Veillard1c732d22002-11-30 11:22:59 +00001845/**
1846 * nodePop:
1847 * @ctxt: an XML parser context
1848 *
1849 * Pops the top element node from the node stack
1850 *
1851 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001852 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001853xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001854nodePop(xmlParserCtxtPtr ctxt)
1855{
1856 xmlNodePtr ret;
1857
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001858 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001859 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001860 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001861 ctxt->nodeNr--;
1862 if (ctxt->nodeNr > 0)
1863 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1864 else
1865 ctxt->node = NULL;
1866 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001867 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001868 return (ret);
1869}
Daniel Veillarda2351322004-06-27 12:08:10 +00001870
Daniel Veillard1c732d22002-11-30 11:22:59 +00001871/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001872 * nameNsPush:
1873 * @ctxt: an XML parser context
1874 * @value: the element name
1875 * @prefix: the element prefix
1876 * @URI: the element namespace name
Elliott Hughese54f00d2021-05-13 08:13:46 -07001877 * @line: the current line number for error messages
1878 * @nsNr: the number of namespaces pushed on the namespace table
Daniel Veillarde57ec792003-09-10 10:50:59 +00001879 *
1880 * Pushes a new element name/prefix/URL on top of the name stack
1881 *
1882 * Returns -1 in case of error, the index in the stack otherwise
1883 */
1884static int
1885nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
Elliott Hughese54f00d2021-05-13 08:13:46 -07001886 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001887{
Elliott Hughese54f00d2021-05-13 08:13:46 -07001888 xmlStartTag *tag;
1889
Daniel Veillarde57ec792003-09-10 10:50:59 +00001890 if (ctxt->nameNr >= ctxt->nameMax) {
1891 const xmlChar * *tmp;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001892 xmlStartTag *tmp2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001893 ctxt->nameMax *= 2;
1894 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1895 ctxt->nameMax *
1896 sizeof(ctxt->nameTab[0]));
1897 if (tmp == NULL) {
1898 ctxt->nameMax /= 2;
1899 goto mem_error;
1900 }
1901 ctxt->nameTab = tmp;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001902 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1903 ctxt->nameMax *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001904 sizeof(ctxt->pushTab[0]));
1905 if (tmp2 == NULL) {
1906 ctxt->nameMax /= 2;
1907 goto mem_error;
1908 }
1909 ctxt->pushTab = tmp2;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001910 } else if (ctxt->pushTab == NULL) {
Elliott Hughese54f00d2021-05-13 08:13:46 -07001911 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001912 sizeof(ctxt->pushTab[0]));
1913 if (ctxt->pushTab == NULL)
1914 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 }
1916 ctxt->nameTab[ctxt->nameNr] = value;
1917 ctxt->name = value;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001918 tag = &ctxt->pushTab[ctxt->nameNr];
1919 tag->prefix = prefix;
1920 tag->URI = URI;
1921 tag->line = line;
1922 tag->nsNr = nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001923 return (ctxt->nameNr++);
1924mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001925 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001926 return (-1);
1927}
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001928#ifdef LIBXML_PUSH_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001929/**
1930 * nameNsPop:
1931 * @ctxt: an XML parser context
1932 *
1933 * Pops the top element/prefix/URI name from the name stack
1934 *
1935 * Returns the name just removed
1936 */
1937static const xmlChar *
1938nameNsPop(xmlParserCtxtPtr ctxt)
1939{
1940 const xmlChar *ret;
1941
1942 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001943 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001944 ctxt->nameNr--;
1945 if (ctxt->nameNr > 0)
1946 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1947 else
1948 ctxt->name = NULL;
1949 ret = ctxt->nameTab[ctxt->nameNr];
1950 ctxt->nameTab[ctxt->nameNr] = NULL;
1951 return (ret);
1952}
Daniel Veillarda2351322004-06-27 12:08:10 +00001953#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001954
1955/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001956 * namePush:
1957 * @ctxt: an XML parser context
1958 * @value: the element name
1959 *
1960 * Pushes a new element name on top of the name stack
1961 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001962 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001963 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001964int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001965namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001966{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001967 if (ctxt == NULL) return (-1);
1968
Daniel Veillard1c732d22002-11-30 11:22:59 +00001969 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001970 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001971 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001972 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001973 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001974 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001975 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001976 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001977 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001978 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001979 }
1980 ctxt->nameTab[ctxt->nameNr] = value;
1981 ctxt->name = value;
1982 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001983mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001984 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001985 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001986}
1987/**
1988 * namePop:
1989 * @ctxt: an XML parser context
1990 *
1991 * Pops the top element name from the name stack
1992 *
1993 * Returns the name just removed
1994 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001995const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001996namePop(xmlParserCtxtPtr ctxt)
1997{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001998 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001999
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002000 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2001 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00002002 ctxt->nameNr--;
2003 if (ctxt->nameNr > 0)
2004 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2005 else
2006 ctxt->name = NULL;
2007 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00002008 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00002009 return (ret);
2010}
Owen Taylor3473f882001-02-23 17:55:21 +00002011
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002012static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002013 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002014 int *tmp;
2015
Owen Taylor3473f882001-02-23 17:55:21 +00002016 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002017 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2018 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2019 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002020 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002021 ctxt->spaceMax /=2;
2022 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002023 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002024 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002025 }
2026 ctxt->spaceTab[ctxt->spaceNr] = val;
2027 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2028 return(ctxt->spaceNr++);
2029}
2030
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002031static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002032 int ret;
2033 if (ctxt->spaceNr <= 0) return(0);
2034 ctxt->spaceNr--;
2035 if (ctxt->spaceNr > 0)
2036 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2037 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00002038 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002039 ret = ctxt->spaceTab[ctxt->spaceNr];
2040 ctxt->spaceTab[ctxt->spaceNr] = -1;
2041 return(ret);
2042}
2043
2044/*
2045 * Macros for accessing the content. Those should be used only by the parser,
2046 * and not exported.
2047 *
2048 * Dirty macros, i.e. one often need to make assumption on the context to
2049 * use them
2050 *
2051 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2052 * To be used with extreme caution since operations consuming
2053 * characters may move the input buffer to a different location !
2054 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2055 * This should be used internally by the parser
2056 * only to compare to ASCII values otherwise it would break when
2057 * running with UTF-8 encoding.
2058 * RAW same as CUR but in the input buffer, bypass any token
2059 * extraction that may have been done
2060 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2061 * to compare on ASCII based substring.
2062 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002063 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002064 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002065 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002066 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2067 *
2068 * NEXT Skip to the next character, this does the proper decoding
2069 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002070 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002071 * CUR_CHAR(l) returns the current unicode character (int), set l
2072 * to the number of xmlChars used for the encoding [0-5].
2073 * CUR_SCHAR same but operate on a string instead of the context
2074 * COPY_BUF copy the current unicode char to the target buffer, increment
2075 * the index
2076 * GROW, SHRINK handling of input buffers
2077 */
2078
Daniel Veillardfdc91562002-07-01 21:52:03 +00002079#define RAW (*ctxt->input->cur)
2080#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002081#define NXT(val) ctxt->input->cur[(val)]
2082#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002083#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002084
Daniel Veillarda07050d2003-10-19 14:46:32 +00002085#define CMP4( s, c1, c2, c3, c4 ) \
2086 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2087 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2088#define CMP5( s, c1, c2, c3, c4, c5 ) \
2089 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2090#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2091 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2092#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2093 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2094#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2095 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2096#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2097 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2098 ((unsigned char *) s)[ 8 ] == c9 )
2099#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2100 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2101 ((unsigned char *) s)[ 9 ] == c10 )
2102
Owen Taylor3473f882001-02-23 17:55:21 +00002103#define SKIP(val) do { \
Haibo Huangf0a546b2020-09-01 20:28:19 -07002104 ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002105 if (*ctxt->input->cur == 0) \
2106 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002107 } while (0)
2108
Daniel Veillard0b787f32004-03-26 17:29:53 +00002109#define SKIPL(val) do { \
2110 int skipl; \
2111 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002112 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002113 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002114 } else ctxt->input->col++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002115 ctxt->input->cur++; \
2116 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002117 if (*ctxt->input->cur == 0) \
2118 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002119 } while (0)
2120
Daniel Veillarda880b122003-04-21 21:36:41 +00002121#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002122 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2123 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002124 xmlSHRINK (ctxt);
2125
2126static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2127 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002128 if (*ctxt->input->cur == 0)
2129 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2130}
Owen Taylor3473f882001-02-23 17:55:21 +00002131
Daniel Veillarda880b122003-04-21 21:36:41 +00002132#define GROW if ((ctxt->progressive == 0) && \
2133 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002134 xmlGROW (ctxt);
2135
2136static void xmlGROW (xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002137 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2138 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
Longstreth Jon190a0b82014-02-06 10:58:17 +01002139
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002140 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2141 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
Vlad Tsyrklevich28f52fe2017-08-10 15:08:48 -07002142 ((ctxt->input->buf) &&
2143 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002144 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2145 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002146 xmlHaltParser(ctxt);
2147 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002148 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002149 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002150 if ((ctxt->input->cur > ctxt->input->end) ||
2151 (ctxt->input->cur < ctxt->input->base)) {
2152 xmlHaltParser(ctxt);
2153 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2154 return;
2155 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002156 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2157 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002158}
Owen Taylor3473f882001-02-23 17:55:21 +00002159
2160#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2161
2162#define NEXT xmlNextChar(ctxt)
2163
Daniel Veillard21a0f912001-02-25 19:54:14 +00002164#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002165 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002166 ctxt->input->cur++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002167 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002168 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2169 }
2170
Owen Taylor3473f882001-02-23 17:55:21 +00002171#define NEXTL(l) do { \
2172 if (*(ctxt->input->cur) == '\n') { \
2173 ctxt->input->line++; ctxt->input->col = 1; \
2174 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002175 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002176 } while (0)
2177
2178#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2179#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2180
2181#define COPY_BUF(l,b,i,v) \
2182 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002183 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002184
2185/**
2186 * xmlSkipBlankChars:
2187 * @ctxt: the XML parser context
2188 *
2189 * skip all blanks character found at that point in the input streams.
2190 * It pops up finished entities in the process if allowable at that point.
2191 *
2192 * Returns the number of space chars skipped
2193 */
2194
2195int
2196xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002197 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002198
2199 /*
2200 * It's Okay to use CUR/NEXT here since all the blanks are on
2201 * the ASCII range.
2202 */
Elliott Hughesecdab2a2022-02-23 14:33:50 -08002203 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2204 (ctxt->instate == XML_PARSER_START)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002205 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002206 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002207 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002208 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002209 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002210 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002211 if (*cur == '\n') {
2212 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002213 } else {
2214 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002215 }
2216 cur++;
2217 res++;
2218 if (*cur == 0) {
2219 ctxt->input->cur = cur;
2220 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2221 cur = ctxt->input->cur;
2222 }
2223 }
2224 ctxt->input->cur = cur;
2225 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002226 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2227
2228 while (1) {
2229 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002230 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002231 } else if (CUR == '%') {
2232 /*
2233 * Need to handle support of entities branching here
2234 */
2235 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2236 break;
2237 xmlParsePEReference(ctxt);
2238 } else if (CUR == 0) {
2239 if (ctxt->inputNr <= 1)
2240 break;
2241 xmlPopInput(ctxt);
2242 } else {
2243 break;
2244 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002245
2246 /*
2247 * Also increase the counter when entering or exiting a PERef.
2248 * The spec says: "When a parameter-entity reference is recognized
2249 * in the DTD and included, its replacement text MUST be enlarged
2250 * by the attachment of one leading and one following space (#x20)
2251 * character."
2252 */
2253 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002254 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002255 }
Owen Taylor3473f882001-02-23 17:55:21 +00002256 return(res);
2257}
2258
2259/************************************************************************
2260 * *
2261 * Commodity functions to handle entities *
2262 * *
2263 ************************************************************************/
2264
2265/**
2266 * xmlPopInput:
2267 * @ctxt: an XML parser context
2268 *
2269 * xmlPopInput: the current input pointed by ctxt->input came to an end
2270 * pop it and return the next char.
2271 *
2272 * Returns the current xmlChar in the parser context
2273 */
2274xmlChar
2275xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002276 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002277 if (xmlParserDebugEntities)
2278 xmlGenericError(xmlGenericErrorContext,
2279 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002280 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2281 (ctxt->instate != XML_PARSER_EOF))
2282 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2283 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002284 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002285 if (*ctxt->input->cur == 0)
2286 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002287 return(CUR);
2288}
2289
2290/**
2291 * xmlPushInput:
2292 * @ctxt: an XML parser context
2293 * @input: an XML parser input fragment (entity, XML fragment ...).
2294 *
2295 * xmlPushInput: switch to a new input stream which is stacked on top
2296 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002297 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002298 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002299int
Owen Taylor3473f882001-02-23 17:55:21 +00002300xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002301 int ret;
2302 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002303
2304 if (xmlParserDebugEntities) {
2305 if ((ctxt->input != NULL) && (ctxt->input->filename))
2306 xmlGenericError(xmlGenericErrorContext,
2307 "%s(%d): ", ctxt->input->filename,
2308 ctxt->input->line);
2309 xmlGenericError(xmlGenericErrorContext,
2310 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2311 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002312 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2313 (ctxt->inputNr > 1024)) {
2314 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2315 while (ctxt->inputNr > 1)
2316 xmlFreeInputStream(inputPop(ctxt));
2317 return(-1);
2318 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002319 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002320 if (ctxt->instate == XML_PARSER_EOF)
2321 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002322 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002323 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002324}
2325
2326/**
2327 * xmlParseCharRef:
2328 * @ctxt: an XML parser context
2329 *
2330 * parse Reference declarations
2331 *
2332 * [66] CharRef ::= '&#' [0-9]+ ';' |
2333 * '&#x' [0-9a-fA-F]+ ';'
2334 *
2335 * [ WFC: Legal Character ]
2336 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002337 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002338 *
2339 * Returns the value parsed (as an int), 0 in case of error
2340 */
2341int
2342xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002343 int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002344 int count = 0;
2345
Owen Taylor3473f882001-02-23 17:55:21 +00002346 /*
2347 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2348 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002349 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002350 (NXT(2) == 'x')) {
2351 SKIP(3);
2352 GROW;
2353 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002354 if (count++ > 20) {
2355 count = 0;
2356 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002357 if (ctxt->instate == XML_PARSER_EOF)
2358 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002359 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002360 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002361 val = val * 16 + (CUR - '0');
2362 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2363 val = val * 16 + (CUR - 'a') + 10;
2364 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2365 val = val * 16 + (CUR - 'A') + 10;
2366 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002367 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002368 val = 0;
2369 break;
2370 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002371 if (val > 0x110000)
2372 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002373
Owen Taylor3473f882001-02-23 17:55:21 +00002374 NEXT;
2375 count++;
2376 }
2377 if (RAW == ';') {
2378 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002379 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002380 ctxt->input->cur++;
2381 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002382 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002383 SKIP(2);
2384 GROW;
2385 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002386 if (count++ > 20) {
2387 count = 0;
2388 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002389 if (ctxt->instate == XML_PARSER_EOF)
2390 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002391 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002392 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002393 val = val * 10 + (CUR - '0');
2394 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002395 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002396 val = 0;
2397 break;
2398 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002399 if (val > 0x110000)
2400 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002401
Owen Taylor3473f882001-02-23 17:55:21 +00002402 NEXT;
2403 count++;
2404 }
2405 if (RAW == ';') {
2406 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002407 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002408 ctxt->input->cur++;
2409 }
2410 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002411 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002412 }
2413
2414 /*
2415 * [ WFC: Legal Character ]
2416 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002417 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002418 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002419 if (val >= 0x110000) {
2420 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2421 "xmlParseCharRef: character reference out of bounds\n",
2422 val);
2423 } else if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002424 return(val);
2425 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002426 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2427 "xmlParseCharRef: invalid xmlChar value %d\n",
2428 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002429 }
2430 return(0);
2431}
2432
2433/**
2434 * xmlParseStringCharRef:
2435 * @ctxt: an XML parser context
2436 * @str: a pointer to an index in the string
2437 *
2438 * parse Reference declarations, variant parsing from a string rather
2439 * than an an input flow.
2440 *
2441 * [66] CharRef ::= '&#' [0-9]+ ';' |
2442 * '&#x' [0-9a-fA-F]+ ';'
2443 *
2444 * [ WFC: Legal Character ]
2445 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002446 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002447 *
2448 * Returns the value parsed (as an int), 0 in case of error, str will be
2449 * updated to the current value of the index
2450 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002451static int
Owen Taylor3473f882001-02-23 17:55:21 +00002452xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2453 const xmlChar *ptr;
2454 xmlChar cur;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002455 int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002456
2457 if ((str == NULL) || (*str == NULL)) return(0);
2458 ptr = *str;
2459 cur = *ptr;
2460 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2461 ptr += 3;
2462 cur = *ptr;
2463 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002464 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002465 val = val * 16 + (cur - '0');
2466 else if ((cur >= 'a') && (cur <= 'f'))
2467 val = val * 16 + (cur - 'a') + 10;
2468 else if ((cur >= 'A') && (cur <= 'F'))
2469 val = val * 16 + (cur - 'A') + 10;
2470 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002472 val = 0;
2473 break;
2474 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002475 if (val > 0x110000)
2476 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002477
Owen Taylor3473f882001-02-23 17:55:21 +00002478 ptr++;
2479 cur = *ptr;
2480 }
2481 if (cur == ';')
2482 ptr++;
2483 } else if ((cur == '&') && (ptr[1] == '#')){
2484 ptr += 2;
2485 cur = *ptr;
2486 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002487 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002488 val = val * 10 + (cur - '0');
2489 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002490 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002491 val = 0;
2492 break;
2493 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002494 if (val > 0x110000)
2495 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002496
Owen Taylor3473f882001-02-23 17:55:21 +00002497 ptr++;
2498 cur = *ptr;
2499 }
2500 if (cur == ';')
2501 ptr++;
2502 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002503 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002504 return(0);
2505 }
2506 *str = ptr;
2507
2508 /*
2509 * [ WFC: Legal Character ]
2510 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002511 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002512 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002513 if (val >= 0x110000) {
2514 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2515 "xmlParseStringCharRef: character reference out of bounds\n",
2516 val);
2517 } else if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002518 return(val);
2519 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002520 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2521 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2522 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002523 }
2524 return(0);
2525}
2526
2527/**
2528 * xmlParserHandlePEReference:
2529 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002530 *
Owen Taylor3473f882001-02-23 17:55:21 +00002531 * [69] PEReference ::= '%' Name ';'
2532 *
2533 * [ WFC: No Recursion ]
2534 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002535 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002536 *
2537 * [ WFC: Entity Declared ]
2538 * In a document without any DTD, a document with only an internal DTD
2539 * subset which contains no parameter entity references, or a document
2540 * with "standalone='yes'", ... ... The declaration of a parameter
2541 * entity must precede any reference to it...
2542 *
2543 * [ VC: Entity Declared ]
2544 * In a document with an external subset or external parameter entities
2545 * with "standalone='no'", ... ... The declaration of a parameter entity
2546 * must precede any reference to it...
2547 *
2548 * [ WFC: In DTD ]
2549 * Parameter-entity references may only appear in the DTD.
2550 * NOTE: misleading but this is handled.
2551 *
2552 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002553 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002554 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002555 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002556 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002557 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002558 */
2559void
2560xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002561 switch(ctxt->instate) {
2562 case XML_PARSER_CDATA_SECTION:
2563 return;
2564 case XML_PARSER_COMMENT:
2565 return;
2566 case XML_PARSER_START_TAG:
2567 return;
2568 case XML_PARSER_END_TAG:
2569 return;
2570 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002571 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002572 return;
2573 case XML_PARSER_PROLOG:
2574 case XML_PARSER_START:
2575 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002576 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002577 return;
2578 case XML_PARSER_ENTITY_DECL:
2579 case XML_PARSER_CONTENT:
2580 case XML_PARSER_ATTRIBUTE_VALUE:
2581 case XML_PARSER_PI:
2582 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002583 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002584 /* we just ignore it there */
2585 return;
2586 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002587 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002588 return;
2589 case XML_PARSER_ENTITY_VALUE:
2590 /*
2591 * NOTE: in the case of entity values, we don't do the
2592 * substitution here since we need the literal
2593 * entity value to be able to save the internal
2594 * subset of the document.
2595 * This will be handled by xmlStringDecodeEntities
2596 */
2597 return;
2598 case XML_PARSER_DTD:
2599 /*
2600 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2601 * In the internal DTD subset, parameter-entity references
2602 * can occur only where markup declarations can occur, not
2603 * within markup declarations.
2604 * In that case this is handled in xmlParseMarkupDecl
2605 */
2606 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2607 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002608 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002609 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002610 break;
2611 case XML_PARSER_IGNORE:
2612 return;
2613 }
2614
Nick Wellnhofer03904152017-06-05 21:16:00 +02002615 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002616}
2617
2618/*
2619 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002620 * buffer##_size is expected to be a size_t
2621 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002622 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002623#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002624 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002625 size_t new_size = buffer##_size * 2 + n; \
2626 if (new_size < buffer##_size) goto mem_error; \
2627 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002628 if (tmp == NULL) goto mem_error; \
2629 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002630 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002631}
2632
2633/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002634 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002635 * @ctxt: the parser context
2636 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002637 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002638 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2639 * @end: an end marker xmlChar, 0 if none
2640 * @end2: an end marker xmlChar, 0 if none
2641 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002642 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002643 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002644 *
2645 * [67] Reference ::= EntityRef | CharRef
2646 *
2647 * [69] PEReference ::= '%' Name ';'
2648 *
2649 * Returns A newly allocated string with the substitution done. The caller
2650 * must deallocate it !
2651 */
2652xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002653xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2654 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002655 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002656 size_t buffer_size = 0;
2657 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002658
2659 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002660 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002661 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002662 xmlEntityPtr ent;
2663 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002664
Daniel Veillarda82b1822004-11-08 16:24:57 +00002665 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002666 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002667 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002668
Daniel Veillard0161e632008-08-28 15:36:32 +00002669 if (((ctxt->depth > 40) &&
2670 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2671 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002672 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002673 return(NULL);
2674 }
2675
2676 /*
2677 * allocate a translation buffer.
2678 */
2679 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002680 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002681 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002682
2683 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002684 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002685 * we are operating on already parsed values.
2686 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002687 if (str < last)
2688 c = CUR_SCHAR(str, l);
2689 else
2690 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002691 while ((c != 0) && (c != end) && /* non input consuming loop */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002692 (c != end2) && (c != end3) &&
2693 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002694
2695 if (c == 0) break;
2696 if ((c == '&') && (str[1] == '#')) {
2697 int val = xmlParseStringCharRef(ctxt, &str);
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002698 if (val == 0)
2699 goto int_error;
2700 COPY_BUF(0,buffer,nbchars,val);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002701 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002702 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002703 }
Owen Taylor3473f882001-02-23 17:55:21 +00002704 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2705 if (xmlParserDebugEntities)
2706 xmlGenericError(xmlGenericErrorContext,
2707 "String decoding Entity Reference: %.30s\n",
2708 str);
2709 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002710 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002711 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002712 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002713 if ((ent != NULL) &&
2714 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2715 if (ent->content != NULL) {
2716 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002717 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002718 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002719 }
Owen Taylor3473f882001-02-23 17:55:21 +00002720 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002721 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2722 "predefined entity has no content\n");
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002723 goto int_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002724 }
2725 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002726 ctxt->depth++;
2727 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2728 0, 0, 0);
2729 ctxt->depth--;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002730 if (rep == NULL) {
2731 ent->content[0] = 0;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002732 goto int_error;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002733 }
Daniel Veillard0161e632008-08-28 15:36:32 +00002734
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002735 current = rep;
2736 while (*current != 0) { /* non input consuming loop */
2737 buffer[nbchars++] = *current++;
2738 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2739 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2740 goto int_error;
2741 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2742 }
2743 }
2744 xmlFree(rep);
2745 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002746 } else if (ent != NULL) {
2747 int i = xmlStrlen(ent->name);
2748 const xmlChar *cur = ent->name;
2749
2750 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002751 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002752 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002753 }
2754 for (;i > 0;i--)
2755 buffer[nbchars++] = *cur++;
2756 buffer[nbchars++] = ';';
2757 }
2758 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2759 if (xmlParserDebugEntities)
2760 xmlGenericError(xmlGenericErrorContext,
2761 "String decoding PE Reference: %.30s\n", str);
2762 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002763 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002764 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002765 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002766 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002767 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002768 /*
2769 * Note: external parsed entities will not be loaded,
2770 * it is not required for a non-validating parser to
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002771 * complete external PEReferences coming from the
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002772 * internal subset
2773 */
2774 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2775 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2776 (ctxt->validate != 0)) {
2777 xmlLoadEntityContent(ctxt, ent);
2778 } else {
2779 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2780 "not validating will not read content for PE entity %s\n",
2781 ent->name, NULL);
2782 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002783 }
Owen Taylor3473f882001-02-23 17:55:21 +00002784 ctxt->depth++;
2785 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2786 0, 0, 0);
2787 ctxt->depth--;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002788 if (rep == NULL) {
2789 if (ent->content != NULL)
2790 ent->content[0] = 0;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002791 goto int_error;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002792 }
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002793 current = rep;
2794 while (*current != 0) { /* non input consuming loop */
2795 buffer[nbchars++] = *current++;
2796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2797 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2798 goto int_error;
2799 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2800 }
2801 }
2802 xmlFree(rep);
2803 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002804 }
2805 } else {
2806 COPY_BUF(l,buffer,nbchars,c);
2807 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002808 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2809 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002810 }
2811 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002812 if (str < last)
2813 c = CUR_SCHAR(str, l);
2814 else
2815 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002816 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002817 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002818 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002819
2820mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002821 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002822int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002823 if (rep != NULL)
2824 xmlFree(rep);
2825 if (buffer != NULL)
2826 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002827 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002828}
2829
Daniel Veillarde57ec792003-09-10 10:50:59 +00002830/**
2831 * xmlStringDecodeEntities:
2832 * @ctxt: the parser context
2833 * @str: the input string
2834 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2835 * @end: an end marker xmlChar, 0 if none
2836 * @end2: an end marker xmlChar, 0 if none
2837 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002838 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002839 * Takes a entity string content and process to do the adequate substitutions.
2840 *
2841 * [67] Reference ::= EntityRef | CharRef
2842 *
2843 * [69] PEReference ::= '%' Name ';'
2844 *
2845 * Returns A newly allocated string with the substitution done. The caller
2846 * must deallocate it !
2847 */
2848xmlChar *
2849xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2850 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002851 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002852 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2853 end, end2, end3));
2854}
Owen Taylor3473f882001-02-23 17:55:21 +00002855
2856/************************************************************************
2857 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002858 * Commodity functions, cleanup needed ? *
2859 * *
2860 ************************************************************************/
2861
2862/**
2863 * areBlanks:
2864 * @ctxt: an XML parser context
2865 * @str: a xmlChar *
2866 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002867 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002868 *
2869 * Is this a sequence of blank chars that one can ignore ?
2870 *
2871 * Returns 1 if ignorable 0 otherwise.
2872 */
2873
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002874static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2875 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002876 int i, ret;
2877 xmlNodePtr lastChild;
2878
Daniel Veillard05c13a22001-09-09 08:38:09 +00002879 /*
2880 * Don't spend time trying to differentiate them, the same callback is
2881 * used !
2882 */
2883 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002884 return(0);
2885
Owen Taylor3473f882001-02-23 17:55:21 +00002886 /*
2887 * Check for xml:space value.
2888 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002889 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2890 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002891 return(0);
2892
2893 /*
2894 * Check that the string is made of blanks
2895 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002896 if (blank_chars == 0) {
2897 for (i = 0;i < len;i++)
2898 if (!(IS_BLANK_CH(str[i]))) return(0);
2899 }
Owen Taylor3473f882001-02-23 17:55:21 +00002900
2901 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002902 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002903 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002904 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002905 if (ctxt->myDoc != NULL) {
2906 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2907 if (ret == 0) return(1);
2908 if (ret == 1) return(0);
2909 }
2910
2911 /*
2912 * Otherwise, heuristic :-\
2913 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002914 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002915 if ((ctxt->node->children == NULL) &&
2916 (RAW == '<') && (NXT(1) == '/')) return(0);
2917
2918 lastChild = xmlGetLastChild(ctxt->node);
2919 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002920 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2921 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002922 } else if (xmlNodeIsText(lastChild))
2923 return(0);
2924 else if ((ctxt->node->children != NULL) &&
2925 (xmlNodeIsText(ctxt->node->children)))
2926 return(0);
2927 return(1);
2928}
2929
Owen Taylor3473f882001-02-23 17:55:21 +00002930/************************************************************************
2931 * *
2932 * Extra stuff for namespace support *
2933 * Relates to http://www.w3.org/TR/WD-xml-names *
2934 * *
2935 ************************************************************************/
2936
2937/**
2938 * xmlSplitQName:
2939 * @ctxt: an XML parser context
2940 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002941 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002942 *
2943 * parse an UTF8 encoded XML qualified name string
2944 *
2945 * [NS 5] QName ::= (Prefix ':')? LocalPart
2946 *
2947 * [NS 6] Prefix ::= NCName
2948 *
2949 * [NS 7] LocalPart ::= NCName
2950 *
2951 * Returns the local part, and prefix is updated
2952 * to get the Prefix if any.
2953 */
2954
2955xmlChar *
2956xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2957 xmlChar buf[XML_MAX_NAMELEN + 5];
2958 xmlChar *buffer = NULL;
2959 int len = 0;
2960 int max = XML_MAX_NAMELEN;
2961 xmlChar *ret = NULL;
2962 const xmlChar *cur = name;
2963 int c;
2964
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002965 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002966 *prefix = NULL;
2967
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002968 if (cur == NULL) return(NULL);
2969
Owen Taylor3473f882001-02-23 17:55:21 +00002970#ifndef XML_XML_NAMESPACE
2971 /* xml: prefix is not really a namespace */
2972 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2973 (cur[2] == 'l') && (cur[3] == ':'))
2974 return(xmlStrdup(name));
2975#endif
2976
Daniel Veillard597bc482003-07-24 16:08:28 +00002977 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002978 if (cur[0] == ':')
2979 return(xmlStrdup(name));
2980
2981 c = *cur++;
2982 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2983 buf[len++] = c;
2984 c = *cur++;
2985 }
2986 if (len >= max) {
2987 /*
2988 * Okay someone managed to make a huge name, so he's ready to pay
2989 * for the processing speed.
2990 */
2991 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002992
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002993 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002994 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002995 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 return(NULL);
2997 }
2998 memcpy(buffer, buf, len);
2999 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3000 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003001 xmlChar *tmp;
3002
Owen Taylor3473f882001-02-23 17:55:21 +00003003 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003004 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003005 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003006 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003007 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003008 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003009 return(NULL);
3010 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003011 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003012 }
3013 buffer[len++] = c;
3014 c = *cur++;
3015 }
3016 buffer[len] = 0;
3017 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003018
Daniel Veillard597bc482003-07-24 16:08:28 +00003019 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003020 if (buffer != NULL)
3021 xmlFree(buffer);
3022 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003023 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003024 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003025
Owen Taylor3473f882001-02-23 17:55:21 +00003026 if (buffer == NULL)
3027 ret = xmlStrndup(buf, len);
3028 else {
3029 ret = buffer;
3030 buffer = NULL;
3031 max = XML_MAX_NAMELEN;
3032 }
3033
3034
3035 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003036 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003037 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003038 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003039 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003040 }
Owen Taylor3473f882001-02-23 17:55:21 +00003041 len = 0;
3042
Daniel Veillardbb284f42002-10-16 18:02:47 +00003043 /*
3044 * Check that the first character is proper to start
3045 * a new name
3046 */
3047 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3048 ((c >= 0x41) && (c <= 0x5A)) ||
3049 (c == '_') || (c == ':'))) {
3050 int l;
3051 int first = CUR_SCHAR(cur, l);
3052
3053 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003054 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003055 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003056 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003057 }
3058 }
3059 cur++;
3060
Owen Taylor3473f882001-02-23 17:55:21 +00003061 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3062 buf[len++] = c;
3063 c = *cur++;
3064 }
3065 if (len >= max) {
3066 /*
3067 * Okay someone managed to make a huge name, so he's ready to pay
3068 * for the processing speed.
3069 */
3070 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003071
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003072 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003073 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003074 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003075 return(NULL);
3076 }
3077 memcpy(buffer, buf, len);
3078 while (c != 0) { /* tested bigname2.xml */
3079 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003080 xmlChar *tmp;
3081
Owen Taylor3473f882001-02-23 17:55:21 +00003082 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003083 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003084 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003085 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003086 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003087 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003088 return(NULL);
3089 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003090 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003091 }
3092 buffer[len++] = c;
3093 c = *cur++;
3094 }
3095 buffer[len] = 0;
3096 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003097
Owen Taylor3473f882001-02-23 17:55:21 +00003098 if (buffer == NULL)
3099 ret = xmlStrndup(buf, len);
3100 else {
3101 ret = buffer;
3102 }
3103 }
3104
3105 return(ret);
3106}
3107
3108/************************************************************************
3109 * *
3110 * The parser itself *
3111 * Relates to http://www.w3.org/TR/REC-xml *
3112 * *
3113 ************************************************************************/
3114
Daniel Veillard34e3f642008-07-29 09:02:27 +00003115/************************************************************************
3116 * *
3117 * Routines to parse Name, NCName and NmToken *
3118 * *
3119 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003120#ifdef DEBUG
3121static unsigned long nbParseName = 0;
3122static unsigned long nbParseNmToken = 0;
3123static unsigned long nbParseNCName = 0;
3124static unsigned long nbParseNCNameComplex = 0;
3125static unsigned long nbParseNameComplex = 0;
3126static unsigned long nbParseStringName = 0;
3127#endif
3128
Daniel Veillard34e3f642008-07-29 09:02:27 +00003129/*
3130 * The two following functions are related to the change of accepted
3131 * characters for Name and NmToken in the Revision 5 of XML-1.0
3132 * They correspond to the modified production [4] and the new production [4a]
3133 * changes in that revision. Also note that the macros used for the
3134 * productions Letter, Digit, CombiningChar and Extender are not needed
3135 * anymore.
3136 * We still keep compatibility to pre-revision5 parsing semantic if the
3137 * new XML_PARSE_OLD10 option is given to the parser.
3138 */
3139static int
3140xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3141 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3142 /*
3143 * Use the new checks of production [4] [4a] amd [5] of the
3144 * Update 5 of XML-1.0
3145 */
3146 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3147 (((c >= 'a') && (c <= 'z')) ||
3148 ((c >= 'A') && (c <= 'Z')) ||
3149 (c == '_') || (c == ':') ||
3150 ((c >= 0xC0) && (c <= 0xD6)) ||
3151 ((c >= 0xD8) && (c <= 0xF6)) ||
3152 ((c >= 0xF8) && (c <= 0x2FF)) ||
3153 ((c >= 0x370) && (c <= 0x37D)) ||
3154 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3155 ((c >= 0x200C) && (c <= 0x200D)) ||
3156 ((c >= 0x2070) && (c <= 0x218F)) ||
3157 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3158 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3159 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3160 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3161 ((c >= 0x10000) && (c <= 0xEFFFF))))
3162 return(1);
3163 } else {
3164 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3165 return(1);
3166 }
3167 return(0);
3168}
3169
3170static int
3171xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3172 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3173 /*
3174 * Use the new checks of production [4] [4a] amd [5] of the
3175 * Update 5 of XML-1.0
3176 */
3177 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3178 (((c >= 'a') && (c <= 'z')) ||
3179 ((c >= 'A') && (c <= 'Z')) ||
3180 ((c >= '0') && (c <= '9')) || /* !start */
3181 (c == '_') || (c == ':') ||
3182 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3183 ((c >= 0xC0) && (c <= 0xD6)) ||
3184 ((c >= 0xD8) && (c <= 0xF6)) ||
3185 ((c >= 0xF8) && (c <= 0x2FF)) ||
3186 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3187 ((c >= 0x370) && (c <= 0x37D)) ||
3188 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3189 ((c >= 0x200C) && (c <= 0x200D)) ||
3190 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3191 ((c >= 0x2070) && (c <= 0x218F)) ||
3192 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3193 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3194 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3195 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3196 ((c >= 0x10000) && (c <= 0xEFFFF))))
3197 return(1);
3198 } else {
3199 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3200 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003201 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003202 (IS_COMBINING(c)) ||
3203 (IS_EXTENDER(c)))
3204 return(1);
3205 }
3206 return(0);
3207}
3208
Daniel Veillarde57ec792003-09-10 10:50:59 +00003209static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003210 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003211
Daniel Veillard34e3f642008-07-29 09:02:27 +00003212static const xmlChar *
3213xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3214 int len = 0, l;
3215 int c;
3216 int count = 0;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003217 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3218 XML_MAX_TEXT_LENGTH :
3219 XML_MAX_NAME_LENGTH;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003220
Daniel Veillardc6561462009-03-25 10:22:31 +00003221#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003222 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003223#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003224
3225 /*
3226 * Handler for more complex cases
3227 */
3228 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003229 if (ctxt->instate == XML_PARSER_EOF)
3230 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003231 c = CUR_CHAR(l);
3232 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3233 /*
3234 * Use the new checks of production [4] [4a] amd [5] of the
3235 * Update 5 of XML-1.0
3236 */
3237 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3238 (!(((c >= 'a') && (c <= 'z')) ||
3239 ((c >= 'A') && (c <= 'Z')) ||
3240 (c == '_') || (c == ':') ||
3241 ((c >= 0xC0) && (c <= 0xD6)) ||
3242 ((c >= 0xD8) && (c <= 0xF6)) ||
3243 ((c >= 0xF8) && (c <= 0x2FF)) ||
3244 ((c >= 0x370) && (c <= 0x37D)) ||
3245 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3246 ((c >= 0x200C) && (c <= 0x200D)) ||
3247 ((c >= 0x2070) && (c <= 0x218F)) ||
3248 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3249 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3250 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3251 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3252 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3253 return(NULL);
3254 }
3255 len += l;
3256 NEXTL(l);
3257 c = CUR_CHAR(l);
3258 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3259 (((c >= 'a') && (c <= 'z')) ||
3260 ((c >= 'A') && (c <= 'Z')) ||
3261 ((c >= '0') && (c <= '9')) || /* !start */
3262 (c == '_') || (c == ':') ||
3263 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3264 ((c >= 0xC0) && (c <= 0xD6)) ||
3265 ((c >= 0xD8) && (c <= 0xF6)) ||
3266 ((c >= 0xF8) && (c <= 0x2FF)) ||
3267 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3268 ((c >= 0x370) && (c <= 0x37D)) ||
3269 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3270 ((c >= 0x200C) && (c <= 0x200D)) ||
3271 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3272 ((c >= 0x2070) && (c <= 0x218F)) ||
3273 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3274 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3275 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3276 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3277 ((c >= 0x10000) && (c <= 0xEFFFF))
3278 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003279 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003280 count = 0;
3281 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003282 if (ctxt->instate == XML_PARSER_EOF)
3283 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003284 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003285 if (len <= INT_MAX - l)
3286 len += l;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003287 NEXTL(l);
3288 c = CUR_CHAR(l);
3289 }
3290 } else {
3291 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3292 (!IS_LETTER(c) && (c != '_') &&
3293 (c != ':'))) {
3294 return(NULL);
3295 }
3296 len += l;
3297 NEXTL(l);
3298 c = CUR_CHAR(l);
3299
3300 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3301 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3302 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003303 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003304 (IS_COMBINING(c)) ||
3305 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003306 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003307 count = 0;
3308 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003309 if (ctxt->instate == XML_PARSER_EOF)
3310 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003311 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003312 if (len <= INT_MAX - l)
3313 len += l;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003314 NEXTL(l);
3315 c = CUR_CHAR(l);
3316 }
3317 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003318 if (len > maxLength) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003319 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3320 return(NULL);
3321 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003322 if (ctxt->input->cur - ctxt->input->base < len) {
3323 /*
3324 * There were a couple of bugs where PERefs lead to to a change
3325 * of the buffer. Check the buffer size to avoid passing an invalid
3326 * pointer to xmlDictLookup.
3327 */
3328 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3329 "unexpected change of input buffer");
3330 return (NULL);
3331 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3333 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3334 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3335}
3336
Owen Taylor3473f882001-02-23 17:55:21 +00003337/**
3338 * xmlParseName:
3339 * @ctxt: an XML parser context
3340 *
3341 * parse an XML name.
3342 *
3343 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3344 * CombiningChar | Extender
3345 *
3346 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3347 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003348 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003349 *
3350 * Returns the Name parsed or NULL
3351 */
3352
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003353const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003354xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003355 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003356 const xmlChar *ret;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003357 size_t count = 0;
3358 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3359 XML_MAX_TEXT_LENGTH :
3360 XML_MAX_NAME_LENGTH;
Owen Taylor3473f882001-02-23 17:55:21 +00003361
3362 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003363
Daniel Veillardc6561462009-03-25 10:22:31 +00003364#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003365 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003366#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003367
Daniel Veillard48b2f892001-02-25 16:11:03 +00003368 /*
3369 * Accelerator for simple ASCII names
3370 */
3371 in = ctxt->input->cur;
3372 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 ((*in >= 0x41) && (*in <= 0x5A)) ||
3374 (*in == '_') || (*in == ':')) {
3375 in++;
3376 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003379 (*in == '_') || (*in == '-') ||
3380 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003381 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003382 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003383 count = in - ctxt->input->cur;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003384 if (count > maxLength) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003385 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386 return(NULL);
3387 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003388 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003389 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003390 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003391 if (ret == NULL)
3392 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003393 return(ret);
3394 }
3395 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003396 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003397 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003398}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003399
Daniel Veillard34e3f642008-07-29 09:02:27 +00003400static const xmlChar *
3401xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402 int len = 0, l;
3403 int c;
3404 int count = 0;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003405 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406 XML_MAX_TEXT_LENGTH :
3407 XML_MAX_NAME_LENGTH;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003408 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003409
Daniel Veillardc6561462009-03-25 10:22:31 +00003410#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003411 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003412#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003413
3414 /*
3415 * Handler for more complex cases
3416 */
3417 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003418 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003419 c = CUR_CHAR(l);
3420 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3421 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3422 return(NULL);
3423 }
3424
3425 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3426 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003427 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003428 count = 0;
3429 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003430 if (ctxt->instate == XML_PARSER_EOF)
3431 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003433 if (len <= INT_MAX - l)
3434 len += l;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003435 NEXTL(l);
3436 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003437 if (c == 0) {
3438 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003439 /*
3440 * when shrinking to extend the buffer we really need to preserve
3441 * the part of the name we already parsed. Hence rolling back
Haibo Huangcfd91dc2020-07-30 23:01:33 -07003442 * by current length.
Daniel Veillard51f02b02015-09-15 16:50:32 +08003443 */
3444 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003445 GROW;
3446 if (ctxt->instate == XML_PARSER_EOF)
3447 return(NULL);
Nick Wellnhofer132af1a2018-01-08 18:48:01 +01003448 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003449 c = CUR_CHAR(l);
3450 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003451 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003452 if (len > maxLength) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003453 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3454 return(NULL);
3455 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003456 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003457}
3458
3459/**
3460 * xmlParseNCName:
3461 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003462 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003463 *
3464 * parse an XML name.
3465 *
3466 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3467 * CombiningChar | Extender
3468 *
3469 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3470 *
3471 * Returns the Name parsed or NULL
3472 */
3473
3474static const xmlChar *
3475xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003476 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003477 const xmlChar *ret;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003478 size_t count = 0;
3479 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3480 XML_MAX_TEXT_LENGTH :
3481 XML_MAX_NAME_LENGTH;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003482
Daniel Veillardc6561462009-03-25 10:22:31 +00003483#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003484 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003485#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003486
3487 /*
3488 * Accelerator for simple ASCII names
3489 */
3490 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003491 e = ctxt->input->end;
3492 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3493 ((*in >= 0x41) && (*in <= 0x5A)) ||
3494 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003495 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003496 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3497 ((*in >= 0x41) && (*in <= 0x5A)) ||
3498 ((*in >= 0x30) && (*in <= 0x39)) ||
3499 (*in == '_') || (*in == '-') ||
3500 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003501 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003502 if (in >= e)
3503 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504 if ((*in > 0) && (*in < 0x80)) {
3505 count = in - ctxt->input->cur;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003506 if (count > maxLength) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003507 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3508 return(NULL);
3509 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003510 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3511 ctxt->input->cur = in;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003512 ctxt->input->col += count;
3513 if (ret == NULL) {
3514 xmlErrMemory(ctxt, NULL);
3515 }
3516 return(ret);
3517 }
3518 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003519complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003520 return(xmlParseNCNameComplex(ctxt));
3521}
3522
Daniel Veillard46de64e2002-05-29 08:21:33 +00003523/**
3524 * xmlParseNameAndCompare:
3525 * @ctxt: an XML parser context
3526 *
3527 * parse an XML name and compares for match
3528 * (specialized for endtag parsing)
3529 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003530 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3531 * and the name for mismatch
3532 */
3533
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003534static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003535xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003536 register const xmlChar *cmp = other;
3537 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003538 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003539
3540 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003541 if (ctxt->instate == XML_PARSER_EOF)
3542 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003543
Daniel Veillard46de64e2002-05-29 08:21:33 +00003544 in = ctxt->input->cur;
3545 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003546 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003547 ++cmp;
3548 }
William M. Brack76e95df2003-10-18 16:20:14 +00003549 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003550 /* success */
Haibo Huangf0a546b2020-09-01 20:28:19 -07003551 ctxt->input->col += in - ctxt->input->cur;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003552 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003553 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003554 }
3555 /* failure (or end of input buffer), check with full function */
3556 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003557 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003558 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003559 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003560 }
3561 return ret;
3562}
3563
Owen Taylor3473f882001-02-23 17:55:21 +00003564/**
3565 * xmlParseStringName:
3566 * @ctxt: an XML parser context
3567 * @str: a pointer to the string pointer (IN/OUT)
3568 *
3569 * parse an XML name.
3570 *
3571 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3572 * CombiningChar | Extender
3573 *
3574 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3575 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003576 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003577 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003578 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003579 * is updated to the current location in the string.
3580 */
3581
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003582static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003583xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3584 xmlChar buf[XML_MAX_NAMELEN + 5];
3585 const xmlChar *cur = *str;
3586 int len = 0, l;
3587 int c;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003588 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3589 XML_MAX_TEXT_LENGTH :
3590 XML_MAX_NAME_LENGTH;
Owen Taylor3473f882001-02-23 17:55:21 +00003591
Daniel Veillardc6561462009-03-25 10:22:31 +00003592#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003593 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003594#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003595
Owen Taylor3473f882001-02-23 17:55:21 +00003596 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003597 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003598 return(NULL);
3599 }
3600
Daniel Veillard34e3f642008-07-29 09:02:27 +00003601 COPY_BUF(l,buf,len,c);
3602 cur += l;
3603 c = CUR_SCHAR(cur, l);
3604 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003605 COPY_BUF(l,buf,len,c);
3606 cur += l;
3607 c = CUR_SCHAR(cur, l);
3608 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3609 /*
3610 * Okay someone managed to make a huge name, so he's ready to pay
3611 * for the processing speed.
3612 */
3613 xmlChar *buffer;
3614 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003615
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003616 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003617 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003618 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003619 return(NULL);
3620 }
3621 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003622 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003623 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003624 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003625
Owen Taylor3473f882001-02-23 17:55:21 +00003626 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003627 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003628 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003629 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003630 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003631 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003632 return(NULL);
3633 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003634 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003635 }
3636 COPY_BUF(l,buffer,len,c);
3637 cur += l;
3638 c = CUR_SCHAR(cur, l);
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003639 if (len > maxLength) {
3640 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3641 xmlFree(buffer);
3642 return(NULL);
3643 }
Owen Taylor3473f882001-02-23 17:55:21 +00003644 }
3645 buffer[len] = 0;
3646 *str = cur;
3647 return(buffer);
3648 }
3649 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003650 if (len > maxLength) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003651 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3652 return(NULL);
3653 }
Owen Taylor3473f882001-02-23 17:55:21 +00003654 *str = cur;
3655 return(xmlStrndup(buf, len));
3656}
3657
3658/**
3659 * xmlParseNmtoken:
3660 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003661 *
Owen Taylor3473f882001-02-23 17:55:21 +00003662 * parse an XML Nmtoken.
3663 *
3664 * [7] Nmtoken ::= (NameChar)+
3665 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003666 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003667 *
3668 * Returns the Nmtoken parsed or NULL
3669 */
3670
3671xmlChar *
3672xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3673 xmlChar buf[XML_MAX_NAMELEN + 5];
3674 int len = 0, l;
3675 int c;
3676 int count = 0;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003677 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3678 XML_MAX_TEXT_LENGTH :
3679 XML_MAX_NAME_LENGTH;
Owen Taylor3473f882001-02-23 17:55:21 +00003680
Daniel Veillardc6561462009-03-25 10:22:31 +00003681#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003682 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003683#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003684
Owen Taylor3473f882001-02-23 17:55:21 +00003685 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003686 if (ctxt->instate == XML_PARSER_EOF)
3687 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003688 c = CUR_CHAR(l);
3689
Daniel Veillard34e3f642008-07-29 09:02:27 +00003690 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003691 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003692 count = 0;
3693 GROW;
3694 }
3695 COPY_BUF(l,buf,len,c);
3696 NEXTL(l);
3697 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003698 if (c == 0) {
3699 count = 0;
3700 GROW;
3701 if (ctxt->instate == XML_PARSER_EOF)
3702 return(NULL);
3703 c = CUR_CHAR(l);
3704 }
Owen Taylor3473f882001-02-23 17:55:21 +00003705 if (len >= XML_MAX_NAMELEN) {
3706 /*
3707 * Okay someone managed to make a huge token, so he's ready to pay
3708 * for the processing speed.
3709 */
3710 xmlChar *buffer;
3711 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003712
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003713 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003714 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003715 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 return(NULL);
3717 }
3718 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003719 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003720 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003721 count = 0;
3722 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003723 if (ctxt->instate == XML_PARSER_EOF) {
3724 xmlFree(buffer);
3725 return(NULL);
3726 }
Owen Taylor3473f882001-02-23 17:55:21 +00003727 }
3728 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003729 xmlChar *tmp;
3730
Owen Taylor3473f882001-02-23 17:55:21 +00003731 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003732 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003733 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003734 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003735 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003736 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003737 return(NULL);
3738 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003739 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003740 }
3741 COPY_BUF(l,buffer,len,c);
3742 NEXTL(l);
3743 c = CUR_CHAR(l);
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003744 if (len > maxLength) {
3745 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3746 xmlFree(buffer);
3747 return(NULL);
3748 }
Owen Taylor3473f882001-02-23 17:55:21 +00003749 }
3750 buffer[len] = 0;
3751 return(buffer);
3752 }
3753 }
3754 if (len == 0)
3755 return(NULL);
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003756 if (len > maxLength) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003757 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3758 return(NULL);
3759 }
Owen Taylor3473f882001-02-23 17:55:21 +00003760 return(xmlStrndup(buf, len));
3761}
3762
3763/**
3764 * xmlParseEntityValue:
3765 * @ctxt: an XML parser context
3766 * @orig: if non-NULL store a copy of the original entity value
3767 *
3768 * parse a value for ENTITY declarations
3769 *
3770 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3771 * "'" ([^%&'] | PEReference | Reference)* "'"
3772 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003773 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003774 */
3775
3776xmlChar *
3777xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3778 xmlChar *buf = NULL;
3779 int len = 0;
3780 int size = XML_PARSER_BUFFER_SIZE;
3781 int c, l;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003782 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3783 XML_MAX_HUGE_LENGTH :
3784 XML_MAX_TEXT_LENGTH;
Owen Taylor3473f882001-02-23 17:55:21 +00003785 xmlChar stop;
3786 xmlChar *ret = NULL;
3787 const xmlChar *cur = NULL;
3788 xmlParserInputPtr input;
3789
3790 if (RAW == '"') stop = '"';
3791 else if (RAW == '\'') stop = '\'';
3792 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003793 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003794 return(NULL);
3795 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003796 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003797 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003798 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003799 return(NULL);
3800 }
3801
3802 /*
3803 * The content of the entity definition is copied in a buffer.
3804 */
3805
3806 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3807 input = ctxt->input;
3808 GROW;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003809 if (ctxt->instate == XML_PARSER_EOF)
3810 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003811 NEXT;
3812 c = CUR_CHAR(l);
3813 /*
3814 * NOTE: 4.4.5 Included in Literal
3815 * When a parameter entity reference appears in a literal entity
3816 * value, ... a single or double quote character in the replacement
3817 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003818 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003819 * In practice it means we stop the loop only when back at parsing
3820 * the initial entity and the quote is found
3821 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003822 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3823 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003824 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003825 xmlChar *tmp;
3826
Owen Taylor3473f882001-02-23 17:55:21 +00003827 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003828 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3829 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003830 xmlErrMemory(ctxt, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003831 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003832 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003833 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003834 }
3835 COPY_BUF(l,buf,len,c);
3836 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003837
3838 GROW;
3839 c = CUR_CHAR(l);
3840 if (c == 0) {
3841 GROW;
3842 c = CUR_CHAR(l);
3843 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003844
3845 if (len > maxLength) {
3846 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3847 "entity value too long\n");
3848 goto error;
3849 }
Owen Taylor3473f882001-02-23 17:55:21 +00003850 }
3851 buf[len] = 0;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003852 if (ctxt->instate == XML_PARSER_EOF)
3853 goto error;
3854 if (c != stop) {
3855 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3856 goto error;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003857 }
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003858 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00003859
3860 /*
3861 * Raise problem w.r.t. '&' and '%' being used in non-entities
3862 * reference constructs. Note Charref will be handled in
3863 * xmlStringDecodeEntities()
3864 */
3865 cur = buf;
3866 while (*cur != 0) { /* non input consuming */
3867 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3868 xmlChar *name;
3869 xmlChar tmp = *cur;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003870 int nameOk = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003871
3872 cur++;
3873 name = xmlParseStringName(ctxt, &cur);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003874 if (name != NULL) {
3875 nameOk = 1;
3876 xmlFree(name);
3877 }
3878 if ((nameOk == 0) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003879 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003880 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003881 tmp);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003882 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003883 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003884 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3885 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003886 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003887 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003888 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003889 if (*cur == 0)
3890 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003891 }
3892 cur++;
3893 }
3894
3895 /*
3896 * Then PEReference entities are substituted.
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003897 *
3898 * NOTE: 4.4.7 Bypassed
3899 * When a general entity reference appears in the EntityValue in
3900 * an entity declaration, it is bypassed and left as is.
3901 * so XML_SUBSTITUTE_REF is not set here.
Owen Taylor3473f882001-02-23 17:55:21 +00003902 */
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003903 ++ctxt->depth;
3904 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3905 0, 0, 0);
3906 --ctxt->depth;
3907 if (orig != NULL) {
3908 *orig = buf;
3909 buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003910 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003911
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003912error:
3913 if (buf != NULL)
3914 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003915 return(ret);
3916}
3917
3918/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003919 * xmlParseAttValueComplex:
3920 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003921 * @len: the resulting attribute len
Haibo Huangcfd91dc2020-07-30 23:01:33 -07003922 * @normalize: whether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003923 *
3924 * parse a value for an attribute, this is the fallback function
3925 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003926 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003927 *
3928 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3929 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003930static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003931xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003932 xmlChar limit = 0;
3933 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003934 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003935 size_t len = 0;
3936 size_t buf_size = 0;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02003937 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3938 XML_MAX_HUGE_LENGTH :
3939 XML_MAX_TEXT_LENGTH;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003940 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003941 xmlChar *current = NULL;
3942 xmlEntityPtr ent;
3943
Owen Taylor3473f882001-02-23 17:55:21 +00003944 if (NXT(0) == '"') {
3945 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3946 limit = '"';
3947 NEXT;
3948 } else if (NXT(0) == '\'') {
3949 limit = '\'';
3950 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3951 NEXT;
3952 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003953 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003954 return(NULL);
3955 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003956
Owen Taylor3473f882001-02-23 17:55:21 +00003957 /*
3958 * allocate a translation buffer.
3959 */
3960 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003961 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003962 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003963
3964 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003965 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003966 */
3967 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003968 while (((NXT(0) != limit) && /* checked */
3969 (IS_CHAR(c)) && (c != '<')) &&
3970 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillardfdc91562002-07-01 21:52:03 +00003971 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003972 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003973 if (NXT(1) == '#') {
3974 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003975
Owen Taylor3473f882001-02-23 17:55:21 +00003976 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003977 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003978 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003979 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003980 }
3981 buf[len++] = '&';
3982 } else {
3983 /*
3984 * The reparsing will be done in xmlStringGetNodeList()
3985 * called by the attribute() function in SAX.c
3986 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003987 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003988 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003989 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003990 buf[len++] = '&';
3991 buf[len++] = '#';
3992 buf[len++] = '3';
3993 buf[len++] = '8';
3994 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003995 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003996 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003997 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003998 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003999 }
Owen Taylor3473f882001-02-23 17:55:21 +00004000 len += xmlCopyChar(0, &buf[len], val);
4001 }
4002 } else {
4003 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004004 ctxt->nbentities++;
4005 if (ent != NULL)
4006 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004007 if ((ent != NULL) &&
4008 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004009 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004010 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004011 }
4012 if ((ctxt->replaceEntities == 0) &&
4013 (ent->content[0] == '&')) {
4014 buf[len++] = '&';
4015 buf[len++] = '#';
4016 buf[len++] = '3';
4017 buf[len++] = '8';
4018 buf[len++] = ';';
4019 } else {
4020 buf[len++] = ent->content[0];
4021 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004022 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004023 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004024 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02004025 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004026 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004027 XML_SUBSTITUTE_REF,
4028 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004029 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004030 if (rep != NULL) {
4031 current = rep;
4032 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004033 if ((*current == 0xD) || (*current == 0xA) ||
4034 (*current == 0x9)) {
4035 buf[len++] = 0x20;
4036 current++;
4037 } else
4038 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004039 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004040 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 }
4042 }
4043 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004044 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004045 }
4046 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004047 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004048 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004049 }
Owen Taylor3473f882001-02-23 17:55:21 +00004050 if (ent->content != NULL)
4051 buf[len++] = ent->content[0];
4052 }
4053 } else if (ent != NULL) {
4054 int i = xmlStrlen(ent->name);
4055 const xmlChar *cur = ent->name;
4056
4057 /*
4058 * This may look absurd but is needed to detect
4059 * entities problems
4060 */
4061 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004062 (ent->content != NULL) && (ent->checked == 0)) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004063 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004064
Peter Simons8f30bdf2016-04-15 11:56:55 +02004065 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004067 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004068 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004069
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004070 diff = ctxt->nbentities - oldnbent + 1;
4071 if (diff > INT_MAX / 2)
4072 diff = INT_MAX / 2;
4073 ent->checked = diff * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004074 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004075 if (xmlStrchr(rep, '<'))
4076 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004077 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004078 rep = NULL;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02004079 } else {
4080 ent->content[0] = 0;
4081 }
Owen Taylor3473f882001-02-23 17:55:21 +00004082 }
4083
4084 /*
4085 * Just output the reference
4086 */
4087 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004088 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004089 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004090 }
4091 for (;i > 0;i--)
4092 buf[len++] = *cur++;
4093 buf[len++] = ';';
4094 }
4095 }
4096 } else {
4097 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004098 if ((len != 0) || (!normalize)) {
4099 if ((!normalize) || (!in_space)) {
4100 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004101 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004102 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004103 }
4104 }
4105 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004106 }
4107 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004108 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004109 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004110 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004111 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004112 }
4113 }
4114 NEXTL(l);
4115 }
4116 GROW;
4117 c = CUR_CHAR(l);
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004118 if (len > maxLength) {
4119 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4120 "AttValue length too long\n");
4121 goto mem_error;
4122 }
Owen Taylor3473f882001-02-23 17:55:21 +00004123 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004124 if (ctxt->instate == XML_PARSER_EOF)
4125 goto error;
4126
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004127 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004128 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004129 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004130 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004131 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004132 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004133 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004134 if ((c != 0) && (!IS_CHAR(c))) {
4135 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4136 "invalid character in attribute value\n");
4137 } else {
4138 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4139 "AttValue: ' expected\n");
4140 }
Owen Taylor3473f882001-02-23 17:55:21 +00004141 } else
4142 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004143
Daniel Veillard459eeb92012-07-17 16:19:17 +08004144 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004145 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004146
4147mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004148 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004149error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004150 if (buf != NULL)
4151 xmlFree(buf);
4152 if (rep != NULL)
4153 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004154 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004155}
4156
4157/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004158 * xmlParseAttValue:
4159 * @ctxt: an XML parser context
4160 *
4161 * parse a value for an attribute
4162 * Note: the parser won't do substitution of entities here, this
4163 * will be handled later in xmlStringGetNodeList
4164 *
4165 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4166 * "'" ([^<&'] | Reference)* "'"
4167 *
4168 * 3.3.3 Attribute-Value Normalization:
4169 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004170 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004171 * - a character reference is processed by appending the referenced
4172 * character to the attribute value
4173 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004174 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004175 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4176 * appending #x20 to the normalized value, except that only a single
4177 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004178 * parsed entity or the literal entity value of an internal parsed entity
4179 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004180 * If the declared value is not CDATA, then the XML processor must further
4181 * process the normalized attribute value by discarding any leading and
4182 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004183 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004184 * All attributes for which no declaration has been read should be treated
4185 * by a non-validating parser as if declared CDATA.
4186 *
4187 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4188 */
4189
4190
4191xmlChar *
4192xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004193 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004194 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004195}
4196
4197/**
Owen Taylor3473f882001-02-23 17:55:21 +00004198 * xmlParseSystemLiteral:
4199 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004200 *
Owen Taylor3473f882001-02-23 17:55:21 +00004201 * parse an XML Literal
4202 *
4203 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4204 *
4205 * Returns the SystemLiteral parsed or NULL
4206 */
4207
4208xmlChar *
4209xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4210 xmlChar *buf = NULL;
4211 int len = 0;
4212 int size = XML_PARSER_BUFFER_SIZE;
4213 int cur, l;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004214 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4215 XML_MAX_TEXT_LENGTH :
4216 XML_MAX_NAME_LENGTH;
Owen Taylor3473f882001-02-23 17:55:21 +00004217 xmlChar stop;
4218 int state = ctxt->instate;
4219 int count = 0;
4220
4221 SHRINK;
4222 if (RAW == '"') {
4223 NEXT;
4224 stop = '"';
4225 } else if (RAW == '\'') {
4226 NEXT;
4227 stop = '\'';
4228 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004229 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004230 return(NULL);
4231 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004232
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004233 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004234 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004235 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004236 return(NULL);
4237 }
4238 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4239 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004240 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004241 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004242 xmlChar *tmp;
4243
Owen Taylor3473f882001-02-23 17:55:21 +00004244 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004245 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4246 if (tmp == NULL) {
4247 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004248 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004249 ctxt->instate = (xmlParserInputState) state;
4250 return(NULL);
4251 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004252 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 }
4254 count++;
4255 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004256 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004257 GROW;
4258 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004259 if (ctxt->instate == XML_PARSER_EOF) {
4260 xmlFree(buf);
4261 return(NULL);
4262 }
Owen Taylor3473f882001-02-23 17:55:21 +00004263 }
4264 COPY_BUF(l,buf,len,cur);
4265 NEXTL(l);
4266 cur = CUR_CHAR(l);
4267 if (cur == 0) {
4268 GROW;
4269 SHRINK;
4270 cur = CUR_CHAR(l);
4271 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004272 if (len > maxLength) {
4273 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4274 xmlFree(buf);
4275 ctxt->instate = (xmlParserInputState) state;
4276 return(NULL);
4277 }
Owen Taylor3473f882001-02-23 17:55:21 +00004278 }
4279 buf[len] = 0;
4280 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004281 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004282 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004283 } else {
4284 NEXT;
4285 }
4286 return(buf);
4287}
4288
4289/**
4290 * xmlParsePubidLiteral:
4291 * @ctxt: an XML parser context
4292 *
4293 * parse an XML public literal
4294 *
4295 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4296 *
4297 * Returns the PubidLiteral parsed or NULL.
4298 */
4299
4300xmlChar *
4301xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4302 xmlChar *buf = NULL;
4303 int len = 0;
4304 int size = XML_PARSER_BUFFER_SIZE;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004305 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4306 XML_MAX_TEXT_LENGTH :
4307 XML_MAX_NAME_LENGTH;
Owen Taylor3473f882001-02-23 17:55:21 +00004308 xmlChar cur;
4309 xmlChar stop;
4310 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004311 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004312
4313 SHRINK;
4314 if (RAW == '"') {
4315 NEXT;
4316 stop = '"';
4317 } else if (RAW == '\'') {
4318 NEXT;
4319 stop = '\'';
4320 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004321 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004322 return(NULL);
4323 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004324 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004325 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004326 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004327 return(NULL);
4328 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004329 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004330 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004331 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004332 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004333 xmlChar *tmp;
4334
Owen Taylor3473f882001-02-23 17:55:21 +00004335 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004336 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4337 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004338 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004339 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004340 return(NULL);
4341 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004342 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004343 }
4344 buf[len++] = cur;
4345 count++;
4346 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004347 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004348 GROW;
4349 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004350 if (ctxt->instate == XML_PARSER_EOF) {
4351 xmlFree(buf);
4352 return(NULL);
4353 }
Owen Taylor3473f882001-02-23 17:55:21 +00004354 }
4355 NEXT;
4356 cur = CUR;
4357 if (cur == 0) {
4358 GROW;
4359 SHRINK;
4360 cur = CUR;
4361 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004362 if (len > maxLength) {
4363 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4364 xmlFree(buf);
4365 return(NULL);
4366 }
Owen Taylor3473f882001-02-23 17:55:21 +00004367 }
4368 buf[len] = 0;
4369 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004370 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004371 } else {
4372 NEXT;
4373 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004374 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004375 return(buf);
4376}
4377
Daniel Veillard8ed10722009-08-20 19:17:36 +02004378static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004379
4380/*
4381 * used for the test in the inner loop of the char data testing
4382 */
4383static const unsigned char test_char_data[256] = {
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4389 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4390 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4391 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4392 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4393 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4394 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4395 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4396 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4397 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4398 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4399 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4416};
4417
Owen Taylor3473f882001-02-23 17:55:21 +00004418/**
4419 * xmlParseCharData:
4420 * @ctxt: an XML parser context
4421 * @cdata: int indicating whether we are within a CDATA section
4422 *
4423 * parse a CharData section.
4424 * if we are within a CDATA section ']]>' marks an end of section.
4425 *
4426 * The right angle bracket (>) may be represented using the string "&gt;",
4427 * and must, for compatibility, be escaped using "&gt;" or a character
4428 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004429 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004430 *
4431 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4432 */
4433
4434void
4435xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004436 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004437 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004438 int line = ctxt->input->line;
4439 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004440 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004441
4442 SHRINK;
4443 GROW;
4444 /*
4445 * Accelerated common case where input don't need to be
4446 * modified before passing it to the handler.
4447 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004448 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004449 in = ctxt->input->cur;
4450 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004451get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004452 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004453 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004454 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004455 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004456 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004457 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004458 goto get_more_space;
4459 }
4460 if (*in == '<') {
4461 nbchar = in - ctxt->input->cur;
4462 if (nbchar > 0) {
4463 const xmlChar *tmp = ctxt->input->cur;
4464 ctxt->input->cur = in;
4465
Daniel Veillard34099b42004-11-04 17:34:35 +00004466 if ((ctxt->sax != NULL) &&
4467 (ctxt->sax->ignorableWhitespace !=
4468 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004469 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004470 if (ctxt->sax->ignorableWhitespace != NULL)
4471 ctxt->sax->ignorableWhitespace(ctxt->userData,
4472 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004473 } else {
4474 if (ctxt->sax->characters != NULL)
4475 ctxt->sax->characters(ctxt->userData,
4476 tmp, nbchar);
4477 if (*ctxt->space == -1)
4478 *ctxt->space = -2;
4479 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004480 } else if ((ctxt->sax != NULL) &&
4481 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004482 ctxt->sax->characters(ctxt->userData,
4483 tmp, nbchar);
4484 }
4485 }
4486 return;
4487 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004488
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004489get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004490 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004491 while (test_char_data[*in]) {
4492 in++;
4493 ccol++;
4494 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004495 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004496 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004497 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004498 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004499 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004500 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004501 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004502 }
4503 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004504 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004505 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004506 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004507 return;
4508 }
4509 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004510 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004511 goto get_more;
4512 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004513 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004514 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004515 if ((ctxt->sax != NULL) &&
4516 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004517 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004518 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004519 const xmlChar *tmp = ctxt->input->cur;
4520 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004521
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004522 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004523 if (ctxt->sax->ignorableWhitespace != NULL)
4524 ctxt->sax->ignorableWhitespace(ctxt->userData,
4525 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004526 } else {
4527 if (ctxt->sax->characters != NULL)
4528 ctxt->sax->characters(ctxt->userData,
4529 tmp, nbchar);
4530 if (*ctxt->space == -1)
4531 *ctxt->space = -2;
4532 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004533 line = ctxt->input->line;
4534 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004535 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004536 if (ctxt->sax->characters != NULL)
4537 ctxt->sax->characters(ctxt->userData,
4538 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004539 line = ctxt->input->line;
4540 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004541 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004542 /* something really bad happened in the SAX callback */
4543 if (ctxt->instate != XML_PARSER_CONTENT)
4544 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004545 }
4546 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004547 if (*in == 0xD) {
4548 in++;
4549 if (*in == 0xA) {
4550 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004551 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004552 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004553 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004554 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004555 in--;
4556 }
4557 if (*in == '<') {
4558 return;
4559 }
4560 if (*in == '&') {
4561 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004562 }
4563 SHRINK;
4564 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004565 if (ctxt->instate == XML_PARSER_EOF)
4566 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004567 in = ctxt->input->cur;
Haibo Huangd23e46c2020-10-28 22:26:09 -07004568 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004569 nbchar = 0;
4570 }
Daniel Veillard50582112001-03-26 22:52:16 +00004571 ctxt->input->line = line;
4572 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004573 xmlParseCharDataComplex(ctxt, cdata);
4574}
4575
Daniel Veillard01c13b52002-12-10 15:19:08 +00004576/**
4577 * xmlParseCharDataComplex:
4578 * @ctxt: an XML parser context
4579 * @cdata: int indicating whether we are within a CDATA section
4580 *
4581 * parse a CharData section.this is the fallback function
4582 * of xmlParseCharData() when the parsing requires handling
4583 * of non-ASCII characters.
4584 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004585static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004586xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004587 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4588 int nbchar = 0;
4589 int cur, l;
4590 int count = 0;
4591
4592 SHRINK;
4593 GROW;
4594 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004595 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004596 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004597 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004598 if ((cur == ']') && (NXT(1) == ']') &&
4599 (NXT(2) == '>')) {
4600 if (cdata) break;
4601 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004602 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004603 }
4604 }
4605 COPY_BUF(l,buf,nbchar,cur);
Elliott Hughesecdab2a2022-02-23 14:33:50 -08004606 /* move current position before possible calling of ctxt->sax->characters */
4607 NEXTL(l);
4608 cur = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00004609 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004610 buf[nbchar] = 0;
4611
Owen Taylor3473f882001-02-23 17:55:21 +00004612 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004613 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004614 */
4615 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004616 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004617 if (ctxt->sax->ignorableWhitespace != NULL)
4618 ctxt->sax->ignorableWhitespace(ctxt->userData,
4619 buf, nbchar);
4620 } else {
4621 if (ctxt->sax->characters != NULL)
4622 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004623 if ((ctxt->sax->characters !=
4624 ctxt->sax->ignorableWhitespace) &&
4625 (*ctxt->space == -1))
4626 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004627 }
4628 }
4629 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004630 /* something really bad happened in the SAX callback */
4631 if (ctxt->instate != XML_PARSER_CONTENT)
4632 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004633 }
4634 count++;
4635 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004636 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004637 GROW;
4638 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004639 if (ctxt->instate == XML_PARSER_EOF)
4640 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004641 }
Owen Taylor3473f882001-02-23 17:55:21 +00004642 }
4643 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004644 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004645 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004646 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004647 */
4648 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004649 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004650 if (ctxt->sax->ignorableWhitespace != NULL)
4651 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4652 } else {
4653 if (ctxt->sax->characters != NULL)
4654 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004655 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4656 (*ctxt->space == -1))
4657 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004658 }
4659 }
4660 }
Nick Wellnhofer69936b12017-08-30 14:16:01 +02004661 if ((cur != 0) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004662 /* Generate the error and skip the offending character */
4663 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4664 "PCDATA invalid Char value %d\n",
4665 cur);
4666 NEXTL(l);
4667 }
Owen Taylor3473f882001-02-23 17:55:21 +00004668}
4669
4670/**
4671 * xmlParseExternalID:
4672 * @ctxt: an XML parser context
4673 * @publicID: a xmlChar** receiving PubidLiteral
4674 * @strict: indicate whether we should restrict parsing to only
4675 * production [75], see NOTE below
4676 *
4677 * Parse an External ID or a Public ID
4678 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004679 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004680 * 'PUBLIC' S PubidLiteral S SystemLiteral
4681 *
4682 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4683 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4684 *
4685 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4686 *
4687 * Returns the function returns SystemLiteral and in the second
4688 * case publicID receives PubidLiteral, is strict is off
4689 * it is possible to return NULL and have publicID set.
4690 */
4691
4692xmlChar *
4693xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4694 xmlChar *URI = NULL;
4695
4696 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004697
4698 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004699 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004700 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004701 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004702 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004704 }
Owen Taylor3473f882001-02-23 17:55:21 +00004705 URI = xmlParseSystemLiteral(ctxt);
4706 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004707 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004708 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004709 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004710 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004711 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004712 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004713 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004714 }
Owen Taylor3473f882001-02-23 17:55:21 +00004715 *publicID = xmlParsePubidLiteral(ctxt);
4716 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004717 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004718 }
4719 if (strict) {
4720 /*
4721 * We don't handle [83] so "S SystemLiteral" is required.
4722 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004723 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004724 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004725 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004726 }
4727 } else {
4728 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004729 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004730 * "S SystemLiteral" is not detected. We skip blanks if no
4731 * system literal was found, but this is harmless since we must
4732 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004733 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004734 if (SKIP_BLANKS == 0) return(NULL);
4735 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004736 }
Owen Taylor3473f882001-02-23 17:55:21 +00004737 URI = xmlParseSystemLiteral(ctxt);
4738 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004739 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004740 }
4741 }
4742 return(URI);
4743}
4744
4745/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004746 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004747 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004748 * @buf: the already parsed part of the buffer
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004749 * @len: number of bytes in the buffer
Daniel Veillard4c778d82005-01-23 17:37:44 +00004750 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004751 *
4752 * Skip an XML (SGML) comment <!-- .... -->
4753 * The spec says that "For compatibility, the string "--" (double-hyphen)
4754 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004755 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004756 *
4757 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4758 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004759static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004760xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4761 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004762 int q, ql;
4763 int r, rl;
4764 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004765 size_t count = 0;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004766 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4767 XML_MAX_HUGE_LENGTH :
4768 XML_MAX_TEXT_LENGTH;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004769 int inputid;
4770
4771 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004772
Owen Taylor3473f882001-02-23 17:55:21 +00004773 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004774 len = 0;
4775 size = XML_PARSER_BUFFER_SIZE;
4776 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4777 if (buf == NULL) {
4778 xmlErrMemory(ctxt, NULL);
4779 return;
4780 }
Owen Taylor3473f882001-02-23 17:55:21 +00004781 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004782 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004783 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004784 if (q == 0)
4785 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004786 if (!IS_CHAR(q)) {
4787 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4788 "xmlParseComment: invalid xmlChar value %d\n",
4789 q);
4790 xmlFree (buf);
4791 return;
4792 }
Owen Taylor3473f882001-02-23 17:55:21 +00004793 NEXTL(ql);
4794 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004795 if (r == 0)
4796 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004797 if (!IS_CHAR(r)) {
4798 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4799 "xmlParseComment: invalid xmlChar value %d\n",
4800 q);
4801 xmlFree (buf);
4802 return;
4803 }
Owen Taylor3473f882001-02-23 17:55:21 +00004804 NEXTL(rl);
4805 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004806 if (cur == 0)
4807 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004808 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004809 ((cur != '>') ||
4810 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004811 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004812 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004813 }
4814 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004815 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004816 size_t new_size;
4817
4818 new_size = size * 2;
4819 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004820 if (new_buf == NULL) {
4821 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004822 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004823 return;
4824 }
William M. Bracka3215c72004-07-31 16:24:01 +00004825 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004826 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004827 }
4828 COPY_BUF(ql,buf,len,q);
4829 q = r;
4830 ql = rl;
4831 r = cur;
4832 rl = l;
4833
4834 count++;
4835 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004836 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004837 GROW;
4838 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004839 if (ctxt->instate == XML_PARSER_EOF) {
4840 xmlFree(buf);
4841 return;
4842 }
Owen Taylor3473f882001-02-23 17:55:21 +00004843 }
4844 NEXTL(l);
4845 cur = CUR_CHAR(l);
4846 if (cur == 0) {
4847 SHRINK;
4848 GROW;
4849 cur = CUR_CHAR(l);
4850 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004851
4852 if (len > maxLength) {
4853 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4854 "Comment too big found", NULL);
4855 xmlFree (buf);
4856 return;
4857 }
Owen Taylor3473f882001-02-23 17:55:21 +00004858 }
4859 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004860 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004862 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004863 } else if (!IS_CHAR(cur)) {
4864 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4865 "xmlParseComment: invalid xmlChar value %d\n",
4866 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004867 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004868 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004869 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004870 "Comment doesn't start and stop in the same"
4871 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004872 }
4873 NEXT;
4874 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4875 (!ctxt->disableSAX))
4876 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004877 }
Daniel Veillardda629342007-08-01 07:49:06 +00004878 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004879 return;
4880not_terminated:
4881 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4882 "Comment not terminated\n", NULL);
4883 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004884 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004885}
Daniel Veillardda629342007-08-01 07:49:06 +00004886
Daniel Veillard4c778d82005-01-23 17:37:44 +00004887/**
4888 * xmlParseComment:
4889 * @ctxt: an XML parser context
4890 *
4891 * Skip an XML (SGML) comment <!-- .... -->
4892 * The spec says that "For compatibility, the string "--" (double-hyphen)
4893 * must not occur within comments. "
4894 *
4895 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4896 */
4897void
4898xmlParseComment(xmlParserCtxtPtr ctxt) {
4899 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004900 size_t size = XML_PARSER_BUFFER_SIZE;
4901 size_t len = 0;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004902 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4903 XML_MAX_HUGE_LENGTH :
4904 XML_MAX_TEXT_LENGTH;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004905 xmlParserInputState state;
4906 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004907 size_t nbchar = 0;
4908 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004909 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004910
4911 /*
4912 * Check that there is a comment right here.
4913 */
4914 if ((RAW != '<') || (NXT(1) != '!') ||
4915 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004916 state = ctxt->instate;
4917 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004918 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004919 SKIP(4);
4920 SHRINK;
4921 GROW;
4922
4923 /*
4924 * Accelerated common case where input don't need to be
4925 * modified before passing it to the handler.
4926 */
4927 in = ctxt->input->cur;
4928 do {
4929 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004930 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004931 ctxt->input->line++; ctxt->input->col = 1;
4932 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004933 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004934 }
4935get_more:
4936 ccol = ctxt->input->col;
4937 while (((*in > '-') && (*in <= 0x7F)) ||
4938 ((*in >= 0x20) && (*in < '-')) ||
4939 (*in == 0x09)) {
4940 in++;
4941 ccol++;
4942 }
4943 ctxt->input->col = ccol;
4944 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004945 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004946 ctxt->input->line++; ctxt->input->col = 1;
4947 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004948 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004949 goto get_more;
4950 }
4951 nbchar = in - ctxt->input->cur;
4952 /*
4953 * save current set of data
4954 */
4955 if (nbchar > 0) {
4956 if ((ctxt->sax != NULL) &&
4957 (ctxt->sax->comment != NULL)) {
4958 if (buf == NULL) {
4959 if ((*in == '-') && (in[1] == '-'))
4960 size = nbchar + 1;
4961 else
4962 size = XML_PARSER_BUFFER_SIZE + nbchar;
4963 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4964 if (buf == NULL) {
4965 xmlErrMemory(ctxt, NULL);
4966 ctxt->instate = state;
4967 return;
4968 }
4969 len = 0;
4970 } else if (len + nbchar + 1 >= size) {
4971 xmlChar *new_buf;
4972 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4973 new_buf = (xmlChar *) xmlRealloc(buf,
4974 size * sizeof(xmlChar));
4975 if (new_buf == NULL) {
4976 xmlFree (buf);
4977 xmlErrMemory(ctxt, NULL);
4978 ctxt->instate = state;
4979 return;
4980 }
4981 buf = new_buf;
4982 }
4983 memcpy(&buf[len], ctxt->input->cur, nbchar);
4984 len += nbchar;
4985 buf[len] = 0;
4986 }
4987 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02004988 if (len > maxLength) {
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004989 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4990 "Comment too big found", NULL);
4991 xmlFree (buf);
4992 return;
4993 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004994 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004995 if (*in == 0xA) {
4996 in++;
4997 ctxt->input->line++; ctxt->input->col = 1;
4998 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004999 if (*in == 0xD) {
5000 in++;
5001 if (*in == 0xA) {
5002 ctxt->input->cur = in;
5003 in++;
5004 ctxt->input->line++; ctxt->input->col = 1;
Elliott Hughesecdab2a2022-02-23 14:33:50 -08005005 goto get_more;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005006 }
5007 in--;
5008 }
5009 SHRINK;
5010 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005011 if (ctxt->instate == XML_PARSER_EOF) {
5012 xmlFree(buf);
5013 return;
5014 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005015 in = ctxt->input->cur;
5016 if (*in == '-') {
5017 if (in[1] == '-') {
5018 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005019 if (ctxt->input->id != inputid) {
5020 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005021 "comment doesn't start and stop in the"
5022 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00005023 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005024 SKIP(3);
5025 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5026 (!ctxt->disableSAX)) {
5027 if (buf != NULL)
5028 ctxt->sax->comment(ctxt->userData, buf);
5029 else
5030 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5031 }
5032 if (buf != NULL)
5033 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005034 if (ctxt->instate != XML_PARSER_EOF)
5035 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005036 return;
5037 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005038 if (buf != NULL) {
5039 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5040 "Double hyphen within comment: "
5041 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005042 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005043 } else
5044 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5045 "Double hyphen within comment\n", NULL);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005046 if (ctxt->instate == XML_PARSER_EOF) {
5047 xmlFree(buf);
5048 return;
5049 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005050 in++;
5051 ctxt->input->col++;
5052 }
5053 in++;
5054 ctxt->input->col++;
5055 goto get_more;
5056 }
Haibo Huangd23e46c2020-10-28 22:26:09 -07005057 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
Daniel Veillard4c778d82005-01-23 17:37:44 +00005058 xmlParseCommentComplex(ctxt, buf, len, size);
5059 ctxt->instate = state;
5060 return;
5061}
5062
Owen Taylor3473f882001-02-23 17:55:21 +00005063
5064/**
5065 * xmlParsePITarget:
5066 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005067 *
Owen Taylor3473f882001-02-23 17:55:21 +00005068 * parse the name of a PI
5069 *
5070 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5071 *
5072 * Returns the PITarget name or NULL
5073 */
5074
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005075const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005076xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005077 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005078
5079 name = xmlParseName(ctxt);
5080 if ((name != NULL) &&
5081 ((name[0] == 'x') || (name[0] == 'X')) &&
5082 ((name[1] == 'm') || (name[1] == 'M')) &&
5083 ((name[2] == 'l') || (name[2] == 'L'))) {
5084 int i;
5085 if ((name[0] == 'x') && (name[1] == 'm') &&
5086 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005087 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005088 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005089 return(name);
5090 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005091 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005092 return(name);
5093 }
5094 for (i = 0;;i++) {
5095 if (xmlW3CPIs[i] == NULL) break;
5096 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5097 return(name);
5098 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005099 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5100 "xmlParsePITarget: invalid name prefix 'xml'\n",
5101 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005102 }
Daniel Veillard37334572008-07-31 08:20:02 +00005103 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005104 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005105 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005106 }
Owen Taylor3473f882001-02-23 17:55:21 +00005107 return(name);
5108}
5109
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005110#ifdef LIBXML_CATALOG_ENABLED
5111/**
5112 * xmlParseCatalogPI:
5113 * @ctxt: an XML parser context
5114 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005115 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005116 * parse an XML Catalog Processing Instruction.
5117 *
5118 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5119 *
5120 * Occurs only if allowed by the user and if happening in the Misc
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005121 * part of the document before any doctype information
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005122 * This will add the given catalog to the parsing context in order
5123 * to be used if there is a resolution need further down in the document
5124 */
5125
5126static void
5127xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5128 xmlChar *URL = NULL;
5129 const xmlChar *tmp, *base;
5130 xmlChar marker;
5131
5132 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005133 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005134 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5135 goto error;
5136 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005137 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005138 if (*tmp != '=') {
5139 return;
5140 }
5141 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005142 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005143 marker = *tmp;
5144 if ((marker != '\'') && (marker != '"'))
5145 goto error;
5146 tmp++;
5147 base = tmp;
5148 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5149 if (*tmp == 0)
5150 goto error;
5151 URL = xmlStrndup(base, tmp - base);
5152 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005153 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005154 if (*tmp != 0)
5155 goto error;
5156
5157 if (URL != NULL) {
5158 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5159 xmlFree(URL);
5160 }
5161 return;
5162
5163error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005164 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5165 "Catalog PI syntax error: %s\n",
5166 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005167 if (URL != NULL)
5168 xmlFree(URL);
5169}
5170#endif
5171
Owen Taylor3473f882001-02-23 17:55:21 +00005172/**
5173 * xmlParsePI:
5174 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005175 *
Owen Taylor3473f882001-02-23 17:55:21 +00005176 * parse an XML Processing Instruction.
5177 *
5178 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5179 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005180 * The processing is transferred to SAX once parsed.
Owen Taylor3473f882001-02-23 17:55:21 +00005181 */
5182
5183void
5184xmlParsePI(xmlParserCtxtPtr ctxt) {
5185 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005186 size_t len = 0;
5187 size_t size = XML_PARSER_BUFFER_SIZE;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02005188 size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5189 XML_MAX_HUGE_LENGTH :
5190 XML_MAX_TEXT_LENGTH;
Owen Taylor3473f882001-02-23 17:55:21 +00005191 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005192 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005193 xmlParserInputState state;
5194 int count = 0;
5195
5196 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005197 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005198 state = ctxt->instate;
5199 ctxt->instate = XML_PARSER_PI;
5200 /*
5201 * this is a Processing Instruction.
5202 */
5203 SKIP(2);
5204 SHRINK;
5205
5206 /*
5207 * Parse the target name and check for special support like
5208 * namespace.
5209 */
5210 target = xmlParsePITarget(ctxt);
5211 if (target != NULL) {
5212 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005213 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005214 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005215 "PI declaration doesn't start and stop in"
5216 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005217 }
5218 SKIP(2);
5219
5220 /*
5221 * SAX: PI detected.
5222 */
5223 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 (ctxt->sax->processingInstruction != NULL))
5225 ctxt->sax->processingInstruction(ctxt->userData,
5226 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005227 if (ctxt->instate != XML_PARSER_EOF)
5228 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005229 return;
5230 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005231 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005232 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005233 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005234 ctxt->instate = state;
5235 return;
5236 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005237 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005238 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5239 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 }
Owen Taylor3473f882001-02-23 17:55:21 +00005241 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005242 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005243 ((cur != '?') || (NXT(1) != '>'))) {
5244 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005245 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005246 size_t new_size = size * 2;
5247 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005248 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005249 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005250 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005251 ctxt->instate = state;
5252 return;
5253 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005254 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005255 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005256 }
5257 count++;
5258 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08005259 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00005260 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005261 if (ctxt->instate == XML_PARSER_EOF) {
5262 xmlFree(buf);
5263 return;
5264 }
Owen Taylor3473f882001-02-23 17:55:21 +00005265 count = 0;
5266 }
5267 COPY_BUF(l,buf,len,cur);
5268 NEXTL(l);
5269 cur = CUR_CHAR(l);
5270 if (cur == 0) {
5271 SHRINK;
5272 GROW;
5273 cur = CUR_CHAR(l);
5274 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02005275 if (len > maxLength) {
5276 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5277 "PI %s too big found", target);
5278 xmlFree(buf);
5279 ctxt->instate = state;
5280 return;
5281 }
Owen Taylor3473f882001-02-23 17:55:21 +00005282 }
5283 buf[len] = 0;
5284 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005285 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5286 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005287 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005288 if (inputid != ctxt->input->id) {
5289 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5290 "PI declaration doesn't start and stop in"
5291 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005292 }
5293 SKIP(2);
5294
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005295#ifdef LIBXML_CATALOG_ENABLED
5296 if (((state == XML_PARSER_MISC) ||
5297 (state == XML_PARSER_START)) &&
5298 (xmlStrEqual(target, XML_CATALOG_PI))) {
5299 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5300 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5301 (allow == XML_CATA_ALLOW_ALL))
5302 xmlParseCatalogPI(ctxt, buf);
5303 }
5304#endif
5305
5306
Owen Taylor3473f882001-02-23 17:55:21 +00005307 /*
5308 * SAX: PI detected.
5309 */
5310 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5311 (ctxt->sax->processingInstruction != NULL))
5312 ctxt->sax->processingInstruction(ctxt->userData,
5313 target, buf);
5314 }
5315 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005317 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005318 }
Chris Evans77404b82011-12-14 16:18:25 +08005319 if (ctxt->instate != XML_PARSER_EOF)
5320 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005321 }
5322}
5323
5324/**
5325 * xmlParseNotationDecl:
5326 * @ctxt: an XML parser context
5327 *
5328 * parse a notation declaration
5329 *
5330 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5331 *
5332 * Hence there is actually 3 choices:
5333 * 'PUBLIC' S PubidLiteral
5334 * 'PUBLIC' S PubidLiteral S SystemLiteral
5335 * and 'SYSTEM' S SystemLiteral
5336 *
5337 * See the NOTE on xmlParseExternalID().
5338 */
5339
5340void
5341xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005342 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005343 xmlChar *Pubid;
5344 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005345
Daniel Veillarda07050d2003-10-19 14:46:32 +00005346 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005347 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005348 SHRINK;
5349 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005350 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005353 return;
5354 }
Owen Taylor3473f882001-02-23 17:55:21 +00005355
Daniel Veillard76d66f42001-05-16 21:05:17 +00005356 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005357 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005358 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005359 return;
5360 }
Daniel Veillard37334572008-07-31 08:20:02 +00005361 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005362 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005363 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005364 name, NULL, NULL);
5365 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005366 if (SKIP_BLANKS == 0) {
5367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5368 "Space required after the NOTATION name'\n");
5369 return;
5370 }
Owen Taylor3473f882001-02-23 17:55:21 +00005371
5372 /*
5373 * Parse the IDs.
5374 */
5375 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5376 SKIP_BLANKS;
5377
5378 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005379 if (inputid != ctxt->input->id) {
5380 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5381 "Notation declaration doesn't start and stop"
5382 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005383 }
5384 NEXT;
5385 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5386 (ctxt->sax->notationDecl != NULL))
5387 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5388 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005389 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005390 }
Owen Taylor3473f882001-02-23 17:55:21 +00005391 if (Systemid != NULL) xmlFree(Systemid);
5392 if (Pubid != NULL) xmlFree(Pubid);
5393 }
5394}
5395
5396/**
5397 * xmlParseEntityDecl:
5398 * @ctxt: an XML parser context
5399 *
5400 * parse <!ENTITY declarations
5401 *
5402 * [70] EntityDecl ::= GEDecl | PEDecl
5403 *
5404 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5405 *
5406 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5407 *
5408 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5409 *
5410 * [74] PEDef ::= EntityValue | ExternalID
5411 *
5412 * [76] NDataDecl ::= S 'NDATA' S Name
5413 *
5414 * [ VC: Notation Declared ]
5415 * The Name must match the declared name of a notation.
5416 */
5417
5418void
5419xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005420 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005421 xmlChar *value = NULL;
5422 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005423 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005424 int isParameter = 0;
5425 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005426
Daniel Veillard4c778d82005-01-23 17:37:44 +00005427 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005428 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005429 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005430 SHRINK;
5431 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005432 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005433 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5434 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005435 }
Owen Taylor3473f882001-02-23 17:55:21 +00005436
5437 if (RAW == '%') {
5438 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005439 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005440 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005441 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005442 }
Owen Taylor3473f882001-02-23 17:55:21 +00005443 isParameter = 1;
5444 }
5445
Daniel Veillard76d66f42001-05-16 21:05:17 +00005446 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005447 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005448 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5449 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005450 return;
5451 }
Daniel Veillard37334572008-07-31 08:20:02 +00005452 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005453 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005454 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005455 name, NULL, NULL);
5456 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005457 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005458 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5459 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005460 }
Owen Taylor3473f882001-02-23 17:55:21 +00005461
Daniel Veillardf5582f12002-06-11 10:08:16 +00005462 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005463 /*
5464 * handle the various case of definitions...
5465 */
5466 if (isParameter) {
5467 if ((RAW == '"') || (RAW == '\'')) {
5468 value = xmlParseEntityValue(ctxt, &orig);
5469 if (value) {
5470 if ((ctxt->sax != NULL) &&
5471 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5472 ctxt->sax->entityDecl(ctxt->userData, name,
5473 XML_INTERNAL_PARAMETER_ENTITY,
5474 NULL, NULL, value);
5475 }
5476 } else {
5477 URI = xmlParseExternalID(ctxt, &literal, 1);
5478 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005479 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005480 }
5481 if (URI) {
5482 xmlURIPtr uri;
5483
5484 uri = xmlParseURI((const char *) URI);
5485 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005486 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5487 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005488 /*
5489 * This really ought to be a well formedness error
5490 * but the XML Core WG decided otherwise c.f. issue
5491 * E26 of the XML erratas.
5492 */
Owen Taylor3473f882001-02-23 17:55:21 +00005493 } else {
5494 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005495 /*
5496 * Okay this is foolish to block those but not
5497 * invalid URIs.
5498 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005499 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005500 } else {
5501 if ((ctxt->sax != NULL) &&
5502 (!ctxt->disableSAX) &&
5503 (ctxt->sax->entityDecl != NULL))
5504 ctxt->sax->entityDecl(ctxt->userData, name,
5505 XML_EXTERNAL_PARAMETER_ENTITY,
5506 literal, URI, NULL);
5507 }
5508 xmlFreeURI(uri);
5509 }
5510 }
5511 }
5512 } else {
5513 if ((RAW == '"') || (RAW == '\'')) {
5514 value = xmlParseEntityValue(ctxt, &orig);
5515 if ((ctxt->sax != NULL) &&
5516 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5517 ctxt->sax->entityDecl(ctxt->userData, name,
5518 XML_INTERNAL_GENERAL_ENTITY,
5519 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005520 /*
5521 * For expat compatibility in SAX mode.
5522 */
5523 if ((ctxt->myDoc == NULL) ||
5524 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5525 if (ctxt->myDoc == NULL) {
5526 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005527 if (ctxt->myDoc == NULL) {
5528 xmlErrMemory(ctxt, "New Doc failed");
5529 return;
5530 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005531 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005532 }
5533 if (ctxt->myDoc->intSubset == NULL)
5534 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5535 BAD_CAST "fake", NULL, NULL);
5536
Daniel Veillard1af9a412003-08-20 22:54:39 +00005537 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5538 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005539 }
Owen Taylor3473f882001-02-23 17:55:21 +00005540 } else {
5541 URI = xmlParseExternalID(ctxt, &literal, 1);
5542 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005543 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005544 }
5545 if (URI) {
5546 xmlURIPtr uri;
5547
5548 uri = xmlParseURI((const char *)URI);
5549 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005550 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5551 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005552 /*
5553 * This really ought to be a well formedness error
5554 * but the XML Core WG decided otherwise c.f. issue
5555 * E26 of the XML erratas.
5556 */
Owen Taylor3473f882001-02-23 17:55:21 +00005557 } else {
5558 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005559 /*
5560 * Okay this is foolish to block those but not
5561 * invalid URIs.
5562 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005563 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 }
5565 xmlFreeURI(uri);
5566 }
5567 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005568 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005569 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5570 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005571 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005572 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005573 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005574 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005575 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5576 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005577 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005578 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005579 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5580 (ctxt->sax->unparsedEntityDecl != NULL))
5581 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5582 literal, URI, ndata);
5583 } else {
5584 if ((ctxt->sax != NULL) &&
5585 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5586 ctxt->sax->entityDecl(ctxt->userData, name,
5587 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5588 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005589 /*
5590 * For expat compatibility in SAX mode.
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005591 * assuming the entity replacement was asked for
Daniel Veillard5997aca2002-03-18 18:36:20 +00005592 */
5593 if ((ctxt->replaceEntities != 0) &&
5594 ((ctxt->myDoc == NULL) ||
5595 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5596 if (ctxt->myDoc == NULL) {
5597 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005598 if (ctxt->myDoc == NULL) {
5599 xmlErrMemory(ctxt, "New Doc failed");
5600 return;
5601 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005602 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005603 }
5604
5605 if (ctxt->myDoc->intSubset == NULL)
5606 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5607 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005608 xmlSAX2EntityDecl(ctxt, name,
5609 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5610 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005611 }
Owen Taylor3473f882001-02-23 17:55:21 +00005612 }
5613 }
5614 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005615 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005616 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005617 SKIP_BLANKS;
5618 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005619 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005620 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005621 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005622 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005623 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005624 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005625 "Entity declaration doesn't start and stop in"
5626 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005627 }
5628 NEXT;
5629 }
5630 if (orig != NULL) {
5631 /*
5632 * Ugly mechanism to save the raw entity value.
5633 */
5634 xmlEntityPtr cur = NULL;
5635
5636 if (isParameter) {
5637 if ((ctxt->sax != NULL) &&
5638 (ctxt->sax->getParameterEntity != NULL))
5639 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5640 } else {
5641 if ((ctxt->sax != NULL) &&
5642 (ctxt->sax->getEntity != NULL))
5643 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005644 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005645 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005646 }
Owen Taylor3473f882001-02-23 17:55:21 +00005647 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005648 if ((cur != NULL) && (cur->orig == NULL)) {
5649 cur->orig = orig;
5650 orig = NULL;
5651 }
Owen Taylor3473f882001-02-23 17:55:21 +00005652 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005653
5654done:
Owen Taylor3473f882001-02-23 17:55:21 +00005655 if (value != NULL) xmlFree(value);
5656 if (URI != NULL) xmlFree(URI);
5657 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005658 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005659 }
5660}
5661
5662/**
5663 * xmlParseDefaultDecl:
5664 * @ctxt: an XML parser context
5665 * @value: Receive a possible fixed default value for the attribute
5666 *
5667 * Parse an attribute default declaration
5668 *
5669 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5670 *
5671 * [ VC: Required Attribute ]
5672 * if the default declaration is the keyword #REQUIRED, then the
5673 * attribute must be specified for all elements of the type in the
5674 * attribute-list declaration.
5675 *
5676 * [ VC: Attribute Default Legal ]
5677 * The declared default value must meet the lexical constraints of
5678 * the declared attribute type c.f. xmlValidateAttributeDecl()
5679 *
5680 * [ VC: Fixed Attribute Default ]
5681 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005682 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005683 *
5684 * [ WFC: No < in Attribute Values ]
5685 * handled in xmlParseAttValue()
5686 *
5687 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005688 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005689 */
5690
5691int
5692xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5693 int val;
5694 xmlChar *ret;
5695
5696 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005697 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005698 SKIP(9);
5699 return(XML_ATTRIBUTE_REQUIRED);
5700 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005701 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005702 SKIP(8);
5703 return(XML_ATTRIBUTE_IMPLIED);
5704 }
5705 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005706 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005707 SKIP(6);
5708 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005709 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005710 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5711 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005712 }
Owen Taylor3473f882001-02-23 17:55:21 +00005713 }
5714 ret = xmlParseAttValue(ctxt);
5715 ctxt->instate = XML_PARSER_DTD;
5716 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005717 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005718 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005719 } else
5720 *value = ret;
5721 return(val);
5722}
5723
5724/**
5725 * xmlParseNotationType:
5726 * @ctxt: an XML parser context
5727 *
5728 * parse an Notation attribute type.
5729 *
5730 * Note: the leading 'NOTATION' S part has already being parsed...
5731 *
5732 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5733 *
5734 * [ VC: Notation Attributes ]
5735 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005736 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005737 *
5738 * Returns: the notation attribute tree built while parsing
5739 */
5740
5741xmlEnumerationPtr
5742xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005743 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005744 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005745
5746 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005747 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005748 return(NULL);
5749 }
5750 SHRINK;
5751 do {
5752 NEXT;
5753 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005754 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005755 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005756 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5757 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005758 xmlFreeEnumeration(ret);
5759 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005760 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005761 tmp = ret;
5762 while (tmp != NULL) {
5763 if (xmlStrEqual(name, tmp->name)) {
5764 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5765 "standalone: attribute notation value token %s duplicated\n",
5766 name, NULL);
5767 if (!xmlDictOwns(ctxt->dict, name))
5768 xmlFree((xmlChar *) name);
5769 break;
5770 }
5771 tmp = tmp->next;
5772 }
5773 if (tmp == NULL) {
5774 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005775 if (cur == NULL) {
5776 xmlFreeEnumeration(ret);
5777 return(NULL);
5778 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005779 if (last == NULL) ret = last = cur;
5780 else {
5781 last->next = cur;
5782 last = cur;
5783 }
Owen Taylor3473f882001-02-23 17:55:21 +00005784 }
5785 SKIP_BLANKS;
5786 } while (RAW == '|');
5787 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005788 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005789 xmlFreeEnumeration(ret);
5790 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005791 }
5792 NEXT;
5793 return(ret);
5794}
5795
5796/**
5797 * xmlParseEnumerationType:
5798 * @ctxt: an XML parser context
5799 *
5800 * parse an Enumeration attribute type.
5801 *
5802 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5803 *
5804 * [ VC: Enumeration ]
5805 * Values of this type must match one of the Nmtoken tokens in
5806 * the declaration
5807 *
5808 * Returns: the enumeration attribute tree built while parsing
5809 */
5810
5811xmlEnumerationPtr
5812xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5813 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005814 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005815
5816 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005817 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005818 return(NULL);
5819 }
5820 SHRINK;
5821 do {
5822 NEXT;
5823 SKIP_BLANKS;
5824 name = xmlParseNmtoken(ctxt);
5825 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005826 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005827 return(ret);
5828 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005829 tmp = ret;
5830 while (tmp != NULL) {
5831 if (xmlStrEqual(name, tmp->name)) {
5832 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5833 "standalone: attribute enumeration value token %s duplicated\n",
5834 name, NULL);
5835 if (!xmlDictOwns(ctxt->dict, name))
5836 xmlFree(name);
5837 break;
5838 }
5839 tmp = tmp->next;
5840 }
5841 if (tmp == NULL) {
5842 cur = xmlCreateEnumeration(name);
5843 if (!xmlDictOwns(ctxt->dict, name))
5844 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005845 if (cur == NULL) {
5846 xmlFreeEnumeration(ret);
5847 return(NULL);
5848 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005849 if (last == NULL) ret = last = cur;
5850 else {
5851 last->next = cur;
5852 last = cur;
5853 }
Owen Taylor3473f882001-02-23 17:55:21 +00005854 }
5855 SKIP_BLANKS;
5856 } while (RAW == '|');
5857 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005858 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005859 return(ret);
5860 }
5861 NEXT;
5862 return(ret);
5863}
5864
5865/**
5866 * xmlParseEnumeratedType:
5867 * @ctxt: an XML parser context
5868 * @tree: the enumeration tree built while parsing
5869 *
5870 * parse an Enumerated attribute type.
5871 *
5872 * [57] EnumeratedType ::= NotationType | Enumeration
5873 *
5874 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5875 *
5876 *
5877 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5878 */
5879
5880int
5881xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005882 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005883 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005884 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005885 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5886 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005887 return(0);
5888 }
Owen Taylor3473f882001-02-23 17:55:21 +00005889 *tree = xmlParseNotationType(ctxt);
5890 if (*tree == NULL) return(0);
5891 return(XML_ATTRIBUTE_NOTATION);
5892 }
5893 *tree = xmlParseEnumerationType(ctxt);
5894 if (*tree == NULL) return(0);
5895 return(XML_ATTRIBUTE_ENUMERATION);
5896}
5897
5898/**
5899 * xmlParseAttributeType:
5900 * @ctxt: an XML parser context
5901 * @tree: the enumeration tree built while parsing
5902 *
5903 * parse the Attribute list def for an element
5904 *
5905 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5906 *
5907 * [55] StringType ::= 'CDATA'
5908 *
5909 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5910 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5911 *
5912 * Validity constraints for attribute values syntax are checked in
5913 * xmlValidateAttributeValue()
5914 *
5915 * [ VC: ID ]
5916 * Values of type ID must match the Name production. A name must not
5917 * appear more than once in an XML document as a value of this type;
5918 * i.e., ID values must uniquely identify the elements which bear them.
5919 *
5920 * [ VC: One ID per Element Type ]
5921 * No element type may have more than one ID attribute specified.
5922 *
5923 * [ VC: ID Attribute Default ]
5924 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5925 *
5926 * [ VC: IDREF ]
5927 * Values of type IDREF must match the Name production, and values
5928 * of type IDREFS must match Names; each IDREF Name must match the value
5929 * of an ID attribute on some element in the XML document; i.e. IDREF
5930 * values must match the value of some ID attribute.
5931 *
5932 * [ VC: Entity Name ]
5933 * Values of type ENTITY must match the Name production, values
5934 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005935 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005936 *
5937 * [ VC: Name Token ]
5938 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005939 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005940 *
5941 * Returns the attribute type
5942 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005943int
Owen Taylor3473f882001-02-23 17:55:21 +00005944xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5945 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005946 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005947 SKIP(5);
5948 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005949 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005950 SKIP(6);
5951 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005952 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005953 SKIP(5);
5954 return(XML_ATTRIBUTE_IDREF);
5955 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5956 SKIP(2);
5957 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005958 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005959 SKIP(6);
5960 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005961 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005962 SKIP(8);
5963 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005964 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005965 SKIP(8);
5966 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005967 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005968 SKIP(7);
5969 return(XML_ATTRIBUTE_NMTOKEN);
5970 }
5971 return(xmlParseEnumeratedType(ctxt, tree));
5972}
5973
5974/**
5975 * xmlParseAttributeListDecl:
5976 * @ctxt: an XML parser context
5977 *
5978 * : parse the Attribute list def for an element
5979 *
5980 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5981 *
5982 * [53] AttDef ::= S Name S AttType S DefaultDecl
5983 *
5984 */
5985void
5986xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005987 const xmlChar *elemName;
5988 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005989 xmlEnumerationPtr tree;
5990
Daniel Veillarda07050d2003-10-19 14:46:32 +00005991 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005992 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005993
5994 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005995 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005996 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005997 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005998 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005999 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006000 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006001 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6002 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006003 return;
6004 }
6005 SKIP_BLANKS;
6006 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006007 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006008 int type;
6009 int def;
6010 xmlChar *defaultValue = NULL;
6011
6012 GROW;
6013 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006014 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006015 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006016 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6017 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006018 break;
6019 }
6020 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006021 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006023 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006024 break;
6025 }
Owen Taylor3473f882001-02-23 17:55:21 +00006026
6027 type = xmlParseAttributeType(ctxt, &tree);
6028 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006029 break;
6030 }
6031
6032 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006033 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006034 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6035 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006036 if (tree != NULL)
6037 xmlFreeEnumeration(tree);
6038 break;
6039 }
Owen Taylor3473f882001-02-23 17:55:21 +00006040
6041 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6042 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006043 if (defaultValue != NULL)
6044 xmlFree(defaultValue);
6045 if (tree != NULL)
6046 xmlFreeEnumeration(tree);
6047 break;
6048 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006049 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6050 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006051
6052 GROW;
6053 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006054 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006055 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006056 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006057 if (defaultValue != NULL)
6058 xmlFree(defaultValue);
6059 if (tree != NULL)
6060 xmlFreeEnumeration(tree);
6061 break;
6062 }
Owen Taylor3473f882001-02-23 17:55:21 +00006063 }
Owen Taylor3473f882001-02-23 17:55:21 +00006064 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6065 (ctxt->sax->attributeDecl != NULL))
6066 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6067 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006068 else if (tree != NULL)
6069 xmlFreeEnumeration(tree);
6070
6071 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006072 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006073 (def != XML_ATTRIBUTE_REQUIRED)) {
6074 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6075 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006076 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006077 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6078 }
Owen Taylor3473f882001-02-23 17:55:21 +00006079 if (defaultValue != NULL)
6080 xmlFree(defaultValue);
6081 GROW;
6082 }
6083 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006084 if (inputid != ctxt->input->id) {
6085 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6086 "Attribute list declaration doesn't start and"
6087 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006088 }
6089 NEXT;
6090 }
Owen Taylor3473f882001-02-23 17:55:21 +00006091 }
6092}
6093
6094/**
6095 * xmlParseElementMixedContentDecl:
6096 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006097 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006098 *
6099 * parse the declaration for a Mixed Element content
6100 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006101 *
Owen Taylor3473f882001-02-23 17:55:21 +00006102 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6103 * '(' S? '#PCDATA' S? ')'
6104 *
6105 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6106 *
6107 * [ VC: No Duplicate Types ]
6108 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006109 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006110 *
6111 * returns: the list of the xmlElementContentPtr describing the element choices
6112 */
6113xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006114xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006115 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006116 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006117
6118 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006119 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006120 SKIP(7);
6121 SKIP_BLANKS;
6122 SHRINK;
6123 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006124 if (ctxt->input->id != inputchk) {
6125 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6126 "Element content declaration doesn't start and"
6127 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006128 }
Owen Taylor3473f882001-02-23 17:55:21 +00006129 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006130 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006131 if (ret == NULL)
6132 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006133 if (RAW == '*') {
6134 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6135 NEXT;
6136 }
6137 return(ret);
6138 }
6139 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006140 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006141 if (ret == NULL) return(NULL);
6142 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006143 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006144 NEXT;
6145 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006146 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Haibo Huangd75f3892021-01-05 21:34:50 -08006147 if (ret == NULL) {
6148 xmlFreeDocElementContent(ctxt->myDoc, cur);
6149 return(NULL);
6150 }
Owen Taylor3473f882001-02-23 17:55:21 +00006151 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006152 if (cur != NULL)
6153 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006154 cur = ret;
6155 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006156 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Haibo Huangd75f3892021-01-05 21:34:50 -08006157 if (n == NULL) {
6158 xmlFreeDocElementContent(ctxt->myDoc, ret);
6159 return(NULL);
6160 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006161 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006162 if (n->c1 != NULL)
6163 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006164 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006165 if (n != NULL)
6166 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006167 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006168 }
6169 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006170 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006171 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006172 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006173 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006174 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006175 return(NULL);
6176 }
6177 SKIP_BLANKS;
6178 GROW;
6179 }
6180 if ((RAW == ')') && (NXT(1) == '*')) {
6181 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006182 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006183 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006184 if (cur->c2 != NULL)
6185 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006186 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006187 if (ret != NULL)
6188 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006189 if (ctxt->input->id != inputchk) {
6190 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6191 "Element content declaration doesn't start and"
6192 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006193 }
Owen Taylor3473f882001-02-23 17:55:21 +00006194 SKIP(2);
6195 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006196 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006197 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006198 return(NULL);
6199 }
6200
6201 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006202 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006203 }
6204 return(ret);
6205}
6206
6207/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006208 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006209 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006210 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006211 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006212 *
6213 * parse the declaration for a Mixed Element content
6214 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006215 *
Owen Taylor3473f882001-02-23 17:55:21 +00006216 *
6217 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6218 *
6219 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6220 *
6221 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6222 *
6223 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6224 *
6225 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6226 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006227 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006228 * opening or closing parentheses in a choice, seq, or Mixed
6229 * construct is contained in the replacement text for a parameter
6230 * entity, both must be contained in the same replacement text. For
6231 * interoperability, if a parameter-entity reference appears in a
6232 * choice, seq, or Mixed construct, its replacement text should not
6233 * be empty, and neither the first nor last non-blank character of
6234 * the replacement text should be a connector (| or ,).
6235 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006236 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006237 * hierarchy.
6238 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006239static xmlElementContentPtr
6240xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6241 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006242 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006243 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006244 xmlChar type = 0;
6245
Daniel Veillard489f9672009-08-10 16:49:30 +02006246 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6247 (depth > 2048)) {
6248 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6249"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6250 depth);
6251 return(NULL);
6252 }
Owen Taylor3473f882001-02-23 17:55:21 +00006253 SKIP_BLANKS;
6254 GROW;
6255 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006256 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006257
Owen Taylor3473f882001-02-23 17:55:21 +00006258 /* Recurse on first child */
6259 NEXT;
6260 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006261 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6262 depth + 1);
Elliott Hughes5cefca72021-05-06 13:23:15 -07006263 if (cur == NULL)
6264 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006265 SKIP_BLANKS;
6266 GROW;
6267 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006268 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006269 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006270 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006271 return(NULL);
6272 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006273 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006274 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006275 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006276 return(NULL);
6277 }
Owen Taylor3473f882001-02-23 17:55:21 +00006278 GROW;
6279 if (RAW == '?') {
6280 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6281 NEXT;
6282 } else if (RAW == '*') {
6283 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6284 NEXT;
6285 } else if (RAW == '+') {
6286 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6287 NEXT;
6288 } else {
6289 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6290 }
Owen Taylor3473f882001-02-23 17:55:21 +00006291 GROW;
6292 }
6293 SKIP_BLANKS;
6294 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006295 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006296 /*
6297 * Each loop we parse one separator and one element.
6298 */
6299 if (RAW == ',') {
6300 if (type == 0) type = CUR;
6301
6302 /*
6303 * Detect "Name | Name , Name" error
6304 */
6305 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006306 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006307 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006308 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006309 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006310 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006311 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006312 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006313 return(NULL);
6314 }
6315 NEXT;
6316
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006317 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006318 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006319 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006320 xmlFreeDocElementContent(ctxt->myDoc, last);
6321 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006322 return(NULL);
6323 }
6324 if (last == NULL) {
6325 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006326 if (ret != NULL)
6327 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006328 ret = cur = op;
6329 } else {
6330 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006331 if (op != NULL)
6332 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006333 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006334 if (last != NULL)
6335 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006336 cur =op;
6337 last = NULL;
6338 }
6339 } else if (RAW == '|') {
6340 if (type == 0) type = CUR;
6341
6342 /*
6343 * Detect "Name , Name | Name" error
6344 */
6345 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006346 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006347 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006348 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006349 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006350 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006351 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006352 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006353 return(NULL);
6354 }
6355 NEXT;
6356
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006357 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006358 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006359 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006360 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006361 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006362 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006363 return(NULL);
6364 }
6365 if (last == NULL) {
6366 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006367 if (ret != NULL)
6368 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006369 ret = cur = op;
6370 } else {
6371 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006372 if (op != NULL)
6373 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006374 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006375 if (last != NULL)
6376 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006377 cur =op;
6378 last = NULL;
6379 }
6380 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006381 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006382 if ((last != NULL) && (last != ret))
6383 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006384 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006385 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006386 return(NULL);
6387 }
6388 GROW;
6389 SKIP_BLANKS;
6390 GROW;
6391 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006392 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006393 /* Recurse on second child */
6394 NEXT;
6395 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006396 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6397 depth + 1);
Elliott Hughes5cefca72021-05-06 13:23:15 -07006398 if (last == NULL) {
6399 if (ret != NULL)
6400 xmlFreeDocElementContent(ctxt->myDoc, ret);
6401 return(NULL);
6402 }
Owen Taylor3473f882001-02-23 17:55:21 +00006403 SKIP_BLANKS;
6404 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006405 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006406 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006407 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006408 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006409 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006410 return(NULL);
6411 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006412 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006413 if (last == NULL) {
6414 if (ret != NULL)
6415 xmlFreeDocElementContent(ctxt->myDoc, ret);
6416 return(NULL);
6417 }
Owen Taylor3473f882001-02-23 17:55:21 +00006418 if (RAW == '?') {
6419 last->ocur = XML_ELEMENT_CONTENT_OPT;
6420 NEXT;
6421 } else if (RAW == '*') {
6422 last->ocur = XML_ELEMENT_CONTENT_MULT;
6423 NEXT;
6424 } else if (RAW == '+') {
6425 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6426 NEXT;
6427 } else {
6428 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6429 }
6430 }
6431 SKIP_BLANKS;
6432 GROW;
6433 }
6434 if ((cur != NULL) && (last != NULL)) {
6435 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006436 if (last != NULL)
6437 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006438 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006439 if (ctxt->input->id != inputchk) {
6440 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6441 "Element content declaration doesn't start and stop in"
6442 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006443 }
Owen Taylor3473f882001-02-23 17:55:21 +00006444 NEXT;
6445 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006446 if (ret != NULL) {
6447 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6448 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6449 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6450 else
6451 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6452 }
Owen Taylor3473f882001-02-23 17:55:21 +00006453 NEXT;
6454 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006455 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006456 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006457 cur = ret;
6458 /*
6459 * Some normalization:
6460 * (a | b* | c?)* == (a | b | c)*
6461 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006462 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006463 if ((cur->c1 != NULL) &&
6464 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6465 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6466 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6467 if ((cur->c2 != NULL) &&
6468 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6469 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6470 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6471 cur = cur->c2;
6472 }
6473 }
Owen Taylor3473f882001-02-23 17:55:21 +00006474 NEXT;
6475 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006476 if (ret != NULL) {
6477 int found = 0;
6478
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006479 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6480 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6481 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006482 else
6483 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006484 /*
6485 * Some normalization:
6486 * (a | b*)+ == (a | b)*
6487 * (a | b?)+ == (a | b)*
6488 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006489 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006490 if ((cur->c1 != NULL) &&
6491 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6492 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6493 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6494 found = 1;
6495 }
6496 if ((cur->c2 != NULL) &&
6497 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6498 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6499 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6500 found = 1;
6501 }
6502 cur = cur->c2;
6503 }
6504 if (found)
6505 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6506 }
Owen Taylor3473f882001-02-23 17:55:21 +00006507 NEXT;
6508 }
6509 return(ret);
6510}
6511
6512/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006513 * xmlParseElementChildrenContentDecl:
6514 * @ctxt: an XML parser context
6515 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006516 *
6517 * parse the declaration for a Mixed Element content
6518 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6519 *
6520 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6521 *
6522 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6523 *
6524 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6525 *
6526 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6527 *
6528 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6529 * TODO Parameter-entity replacement text must be properly nested
6530 * with parenthesized groups. That is to say, if either of the
6531 * opening or closing parentheses in a choice, seq, or Mixed
6532 * construct is contained in the replacement text for a parameter
6533 * entity, both must be contained in the same replacement text. For
6534 * interoperability, if a parameter-entity reference appears in a
6535 * choice, seq, or Mixed construct, its replacement text should not
6536 * be empty, and neither the first nor last non-blank character of
6537 * the replacement text should be a connector (| or ,).
6538 *
6539 * Returns the tree of xmlElementContentPtr describing the element
6540 * hierarchy.
6541 */
6542xmlElementContentPtr
6543xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6544 /* stub left for API/ABI compat */
6545 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6546}
6547
6548/**
Owen Taylor3473f882001-02-23 17:55:21 +00006549 * xmlParseElementContentDecl:
6550 * @ctxt: an XML parser context
6551 * @name: the name of the element being defined.
6552 * @result: the Element Content pointer will be stored here if any
6553 *
6554 * parse the declaration for an Element content either Mixed or Children,
6555 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006556 *
Owen Taylor3473f882001-02-23 17:55:21 +00006557 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6558 *
6559 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6560 */
6561
6562int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006563xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006564 xmlElementContentPtr *result) {
6565
6566 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006567 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006568 int res;
6569
6570 *result = NULL;
6571
6572 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006573 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006574 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006575 return(-1);
6576 }
6577 NEXT;
6578 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006579 if (ctxt->instate == XML_PARSER_EOF)
6580 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006581 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006582 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006583 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006584 res = XML_ELEMENT_TYPE_MIXED;
6585 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006586 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006587 res = XML_ELEMENT_TYPE_ELEMENT;
6588 }
Owen Taylor3473f882001-02-23 17:55:21 +00006589 SKIP_BLANKS;
6590 *result = tree;
6591 return(res);
6592}
6593
6594/**
6595 * xmlParseElementDecl:
6596 * @ctxt: an XML parser context
6597 *
6598 * parse an Element declaration.
6599 *
6600 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6601 *
6602 * [ VC: Unique Element Type Declaration ]
6603 * No element type may be declared more than once
6604 *
6605 * Returns the type of the element, or -1 in case of error
6606 */
6607int
6608xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006609 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006610 int ret = -1;
6611 xmlElementContentPtr content = NULL;
6612
Daniel Veillard4c778d82005-01-23 17:37:44 +00006613 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006614 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006615 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006616
6617 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006618 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006619 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6620 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006621 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006622 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006623 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006624 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006625 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6626 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006627 return(-1);
6628 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006629 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6631 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006632 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006633 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006634 SKIP(5);
6635 /*
6636 * Element must always be empty.
6637 */
6638 ret = XML_ELEMENT_TYPE_EMPTY;
6639 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6640 (NXT(2) == 'Y')) {
6641 SKIP(3);
6642 /*
6643 * Element is a generic container.
6644 */
6645 ret = XML_ELEMENT_TYPE_ANY;
6646 } else if (RAW == '(') {
6647 ret = xmlParseElementContentDecl(ctxt, name, &content);
6648 } else {
6649 /*
6650 * [ WFC: PEs in Internal Subset ] error handling.
6651 */
6652 if ((RAW == '%') && (ctxt->external == 0) &&
6653 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006654 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006655 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006656 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006657 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006658 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6659 }
Owen Taylor3473f882001-02-23 17:55:21 +00006660 return(-1);
6661 }
6662
6663 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006664
6665 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006666 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006667 if (content != NULL) {
6668 xmlFreeDocElementContent(ctxt->myDoc, content);
6669 }
Owen Taylor3473f882001-02-23 17:55:21 +00006670 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006671 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006672 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006673 "Element declaration doesn't start and stop in"
6674 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006675 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006676
Owen Taylor3473f882001-02-23 17:55:21 +00006677 NEXT;
6678 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006679 (ctxt->sax->elementDecl != NULL)) {
6680 if (content != NULL)
6681 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006682 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6683 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006684 if ((content != NULL) && (content->parent == NULL)) {
6685 /*
6686 * this is a trick: if xmlAddElementDecl is called,
6687 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006688 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006689 * interfaces or change the API/ABI
6690 */
6691 xmlFreeDocElementContent(ctxt->myDoc, content);
6692 }
6693 } else if (content != NULL) {
6694 xmlFreeDocElementContent(ctxt->myDoc, content);
6695 }
Owen Taylor3473f882001-02-23 17:55:21 +00006696 }
Owen Taylor3473f882001-02-23 17:55:21 +00006697 }
6698 return(ret);
6699}
6700
6701/**
Owen Taylor3473f882001-02-23 17:55:21 +00006702 * xmlParseConditionalSections
6703 * @ctxt: an XML parser context
6704 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006705 * [61] conditionalSect ::= includeSect | ignoreSect
6706 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006707 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6708 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6709 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6710 */
6711
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006712static void
Owen Taylor3473f882001-02-23 17:55:21 +00006713xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006714 int *inputIds = NULL;
6715 size_t inputIdsSize = 0;
6716 size_t depth = 0;
Daniel Veillard49d44052008-08-27 19:57:06 +00006717
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006718 while (ctxt->instate != XML_PARSER_EOF) {
6719 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6720 int id = ctxt->input->id;
6721
6722 SKIP(3);
6723 SKIP_BLANKS;
6724
6725 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6726 SKIP(7);
6727 SKIP_BLANKS;
6728 if (RAW != '[') {
6729 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6730 xmlHaltParser(ctxt);
6731 goto error;
6732 }
6733 if (ctxt->input->id != id) {
6734 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6735 "All markup of the conditional section is"
6736 " not in the same entity\n");
6737 }
6738 NEXT;
6739
6740 if (inputIdsSize <= depth) {
6741 int *tmp;
6742
6743 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6744 tmp = (int *) xmlRealloc(inputIds,
6745 inputIdsSize * sizeof(int));
6746 if (tmp == NULL) {
6747 xmlErrMemory(ctxt, NULL);
6748 goto error;
6749 }
6750 inputIds = tmp;
6751 }
6752 inputIds[depth] = id;
6753 depth++;
6754 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6755 int state;
6756 xmlParserInputState instate;
6757 size_t ignoreDepth = 0;
6758
6759 SKIP(6);
6760 SKIP_BLANKS;
6761 if (RAW != '[') {
6762 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6763 xmlHaltParser(ctxt);
6764 goto error;
6765 }
6766 if (ctxt->input->id != id) {
6767 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6768 "All markup of the conditional section is"
6769 " not in the same entity\n");
6770 }
6771 NEXT;
6772
6773 /*
6774 * Parse up to the end of the conditional section but disable
6775 * SAX event generating DTD building in the meantime
6776 */
6777 state = ctxt->disableSAX;
6778 instate = ctxt->instate;
6779 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6780 ctxt->instate = XML_PARSER_IGNORE;
6781
6782 while (RAW != 0) {
6783 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6784 SKIP(3);
6785 ignoreDepth++;
6786 /* Check for integer overflow */
6787 if (ignoreDepth == 0) {
6788 xmlErrMemory(ctxt, NULL);
6789 goto error;
6790 }
6791 } else if ((RAW == ']') && (NXT(1) == ']') &&
6792 (NXT(2) == '>')) {
6793 if (ignoreDepth == 0)
6794 break;
6795 SKIP(3);
6796 ignoreDepth--;
6797 } else {
6798 NEXT;
6799 }
6800 }
6801
6802 ctxt->disableSAX = state;
6803 ctxt->instate = instate;
6804
6805 if (RAW == 0) {
6806 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6807 goto error;
6808 }
6809 if (ctxt->input->id != id) {
6810 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6811 "All markup of the conditional section is"
6812 " not in the same entity\n");
6813 }
6814 SKIP(3);
6815 } else {
6816 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6817 xmlHaltParser(ctxt);
6818 goto error;
6819 }
6820 } else if ((depth > 0) &&
6821 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6822 depth--;
6823 if (ctxt->input->id != inputIds[depth]) {
6824 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6825 "All markup of the conditional section is not"
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006826 " in the same entity\n");
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006827 }
6828 SKIP(3);
6829 } else {
6830 const xmlChar *check = CUR_PTR;
6831 unsigned int cons = ctxt->input->consumed;
6832
6833 xmlParseMarkupDecl(ctxt);
6834
6835 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6836 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6837 xmlHaltParser(ctxt);
6838 goto error;
6839 }
6840 }
6841
6842 if (depth == 0)
6843 break;
Owen Taylor3473f882001-02-23 17:55:21 +00006844
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006845 SKIP_BLANKS;
6846 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006847 }
6848
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006849error:
6850 xmlFree(inputIds);
Owen Taylor3473f882001-02-23 17:55:21 +00006851}
6852
6853/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006854 * xmlParseMarkupDecl:
6855 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006856 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006857 * parse Markup declarations
6858 *
6859 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6860 * NotationDecl | PI | Comment
6861 *
6862 * [ VC: Proper Declaration/PE Nesting ]
6863 * Parameter-entity replacement text must be properly nested with
6864 * markup declarations. That is to say, if either the first character
6865 * or the last character of a markup declaration (markupdecl above) is
6866 * contained in the replacement text for a parameter-entity reference,
6867 * both must be contained in the same replacement text.
6868 *
6869 * [ WFC: PEs in Internal Subset ]
6870 * In the internal DTD subset, parameter-entity references can occur
6871 * only where markup declarations can occur, not within markup declarations.
6872 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006873 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006874 */
6875void
6876xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6877 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006878 if (CUR == '<') {
6879 if (NXT(1) == '!') {
6880 switch (NXT(2)) {
6881 case 'E':
6882 if (NXT(3) == 'L')
6883 xmlParseElementDecl(ctxt);
6884 else if (NXT(3) == 'N')
6885 xmlParseEntityDecl(ctxt);
6886 break;
6887 case 'A':
6888 xmlParseAttributeListDecl(ctxt);
6889 break;
6890 case 'N':
6891 xmlParseNotationDecl(ctxt);
6892 break;
6893 case '-':
6894 xmlParseComment(ctxt);
6895 break;
6896 default:
6897 /* there is an error but it will be detected later */
6898 break;
6899 }
6900 } else if (NXT(1) == '?') {
6901 xmlParsePI(ctxt);
6902 }
6903 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006904
6905 /*
6906 * detect requirement to exit there and act accordingly
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006907 * and avoid having instate overridden later on
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006908 */
6909 if (ctxt->instate == XML_PARSER_EOF)
6910 return;
6911
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006912 ctxt->instate = XML_PARSER_DTD;
6913}
6914
6915/**
6916 * xmlParseTextDecl:
6917 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006918 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006919 * parse an XML declaration header for external entities
6920 *
6921 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006922 */
6923
6924void
6925xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6926 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006927 const xmlChar *encoding;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006928 int oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006929
6930 /*
6931 * We know that '<?xml' is here.
6932 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006933 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006934 SKIP(5);
6935 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006936 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006937 return;
6938 }
6939
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006940 /* Avoid expansion of parameter entities when skipping blanks. */
6941 oldstate = ctxt->instate;
6942 ctxt->instate = XML_PARSER_START;
6943
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006944 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6946 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006947 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006948
6949 /*
6950 * We may have the VersionInfo here.
6951 */
6952 version = xmlParseVersionInfo(ctxt);
6953 if (version == NULL)
6954 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006955 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006956 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006957 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6958 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006959 }
6960 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006961 ctxt->input->version = version;
6962
6963 /*
6964 * We must have the encoding declaration
6965 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006966 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006967 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6968 /*
6969 * The XML REC instructs us to stop parsing right here
6970 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006971 ctxt->instate = oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006972 return;
6973 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006974 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6975 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6976 "Missing encoding in text declaration\n");
6977 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006978
6979 SKIP_BLANKS;
6980 if ((RAW == '?') && (NXT(1) == '>')) {
6981 SKIP(2);
6982 } else if (RAW == '>') {
6983 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006984 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006985 NEXT;
6986 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006987 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006988 MOVETO_ENDTAG(CUR_PTR);
6989 NEXT;
6990 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006991
6992 ctxt->instate = oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006993}
6994
6995/**
Owen Taylor3473f882001-02-23 17:55:21 +00006996 * xmlParseExternalSubset:
6997 * @ctxt: an XML parser context
6998 * @ExternalID: the external identifier
6999 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007000 *
Owen Taylor3473f882001-02-23 17:55:21 +00007001 * parse Markup declarations from an external subset
7002 *
7003 * [30] extSubset ::= textDecl? extSubsetDecl
7004 *
7005 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7006 */
7007void
7008xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7009 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007010 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007011 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007012
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007013 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007014 (ctxt->input->end - ctxt->input->cur >= 4)) {
7015 xmlChar start[4];
7016 xmlCharEncoding enc;
7017
7018 start[0] = RAW;
7019 start[1] = NXT(1);
7020 start[2] = NXT(2);
7021 start[3] = NXT(3);
7022 enc = xmlDetectCharEncoding(start, 4);
7023 if (enc != XML_CHAR_ENCODING_NONE)
7024 xmlSwitchEncoding(ctxt, enc);
7025 }
7026
Daniel Veillarda07050d2003-10-19 14:46:32 +00007027 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007028 xmlParseTextDecl(ctxt);
7029 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7030 /*
7031 * The XML REC instructs us to stop parsing right here
7032 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007033 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007034 return;
7035 }
7036 }
7037 if (ctxt->myDoc == NULL) {
7038 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007039 if (ctxt->myDoc == NULL) {
7040 xmlErrMemory(ctxt, "New Doc failed");
7041 return;
7042 }
7043 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007044 }
7045 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7046 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7047
7048 ctxt->instate = XML_PARSER_DTD;
7049 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007050 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00007051 while (((RAW == '<') && (NXT(1) == '?')) ||
7052 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007053 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007054 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007055 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007056
7057 GROW;
7058 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7059 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007060 } else
7061 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007062 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00007063
Daniel Veillardfdc91562002-07-01 21:52:03 +00007064 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007065 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007066 break;
7067 }
7068 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007069
Owen Taylor3473f882001-02-23 17:55:21 +00007070 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007071 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007072 }
7073
7074}
7075
7076/**
7077 * xmlParseReference:
7078 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007079 *
Owen Taylor3473f882001-02-23 17:55:21 +00007080 * parse and handle entity references in content, depending on the SAX
7081 * interface, this may end-up in a call to character() if this is a
7082 * CharRef, a predefined entity, if there is no reference() callback.
7083 * or if the parser was asked to switch to that mode.
7084 *
7085 * [67] Reference ::= EntityRef | CharRef
7086 */
7087void
7088xmlParseReference(xmlParserCtxtPtr ctxt) {
7089 xmlEntityPtr ent;
7090 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007091 int was_checked;
7092 xmlNodePtr list = NULL;
7093 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007094
Daniel Veillard0161e632008-08-28 15:36:32 +00007095
7096 if (RAW != '&')
7097 return;
7098
7099 /*
7100 * Simple case of a CharRef
7101 */
Owen Taylor3473f882001-02-23 17:55:21 +00007102 if (NXT(1) == '#') {
7103 int i = 0;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007104 xmlChar out[16];
Owen Taylor3473f882001-02-23 17:55:21 +00007105 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007106 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007107
Daniel Veillarddc171602008-03-26 17:41:38 +00007108 if (value == 0)
7109 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007110 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7111 /*
7112 * So we are using non-UTF-8 buffers
7113 * Check that the char fit on 8bits, if not
7114 * generate a CharRef.
7115 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007116 if (value <= 0xFF) {
7117 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007118 out[1] = 0;
7119 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7120 (!ctxt->disableSAX))
7121 ctxt->sax->characters(ctxt->userData, out, 1);
7122 } else {
7123 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007124 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007125 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007126 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007127 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7128 (!ctxt->disableSAX))
7129 ctxt->sax->reference(ctxt->userData, out);
7130 }
7131 } else {
7132 /*
7133 * Just encode the value in UTF-8
7134 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007135 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007136 out[i] = 0;
7137 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7138 (!ctxt->disableSAX))
7139 ctxt->sax->characters(ctxt->userData, out, i);
7140 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007141 return;
7142 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007143
Daniel Veillard0161e632008-08-28 15:36:32 +00007144 /*
7145 * We are seeing an entity reference
7146 */
7147 ent = xmlParseEntityRef(ctxt);
7148 if (ent == NULL) return;
7149 if (!ctxt->wellFormed)
7150 return;
7151 was_checked = ent->checked;
7152
7153 /* special case of predefined entities */
7154 if ((ent->name == NULL) ||
7155 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7156 val = ent->content;
7157 if (val == NULL) return;
7158 /*
7159 * inline the entity.
7160 */
7161 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7162 (!ctxt->disableSAX))
7163 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7164 return;
7165 }
7166
7167 /*
7168 * The first reference to the entity trigger a parsing phase
7169 * where the ent->children is filled with the result from
7170 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007171 * Note: external parsed entities will not be loaded, it is not
7172 * required for a non-validating parser, unless the parsing option
7173 * of validating, or substituting entities were given. Doing so is
7174 * far more secure as the parser will only process data coming from
7175 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007176 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007177 if (((ent->checked == 0) ||
7178 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007179 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7180 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007181 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillard0161e632008-08-28 15:36:32 +00007182
7183 /*
7184 * This is a bit hackish but this seems the best
7185 * way to make sure both SAX and DOM entity support
7186 * behaves okay.
7187 */
7188 void *user_data;
7189 if (ctxt->userData == ctxt)
7190 user_data = NULL;
7191 else
7192 user_data = ctxt->userData;
7193
7194 /*
7195 * Check that this entity is well formed
7196 * 4.3.2: An internal general parsed entity is well-formed
7197 * if its replacement text matches the production labeled
7198 * content.
7199 */
7200 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7201 ctxt->depth++;
7202 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7203 user_data, &list);
7204 ctxt->depth--;
7205
7206 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7207 ctxt->depth++;
7208 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7209 user_data, ctxt->depth, ent->URI,
7210 ent->ExternalID, &list);
7211 ctxt->depth--;
7212 } else {
7213 ret = XML_ERR_ENTITY_PE_INTERNAL;
7214 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7215 "invalid entity type found\n", NULL);
7216 }
7217
7218 /*
7219 * Store the number of entities needing parsing for this entity
7220 * content and do checkings
7221 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007222 diff = ctxt->nbentities - oldnbent + 1;
7223 if (diff > INT_MAX / 2)
7224 diff = INT_MAX / 2;
7225 ent->checked = diff * 2;
Daniel Veillardcff25462013-03-11 15:57:55 +08007226 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7227 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007228 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007229 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Haibo Huangd75f3892021-01-05 21:34:50 -08007230 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007231 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007232 return;
7233 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007234 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007235 xmlFreeNodeList(list);
7236 return;
7237 }
Owen Taylor3473f882001-02-23 17:55:21 +00007238
Daniel Veillard0161e632008-08-28 15:36:32 +00007239 if ((ret == XML_ERR_OK) && (list != NULL)) {
7240 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7241 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7242 (ent->children == NULL)) {
7243 ent->children = list;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007244 /*
7245 * Prune it directly in the generated document
7246 * except for single text nodes.
7247 */
7248 if ((ctxt->replaceEntities == 0) ||
7249 (ctxt->parseMode == XML_PARSE_READER) ||
7250 ((list->type == XML_TEXT_NODE) &&
7251 (list->next == NULL))) {
7252 ent->owner = 1;
7253 while (list != NULL) {
7254 list->parent = (xmlNodePtr) ent;
7255 xmlSetTreeDoc(list, ent->doc);
7256 if (list->next == NULL)
7257 ent->last = list;
7258 list = list->next;
7259 }
7260 list = NULL;
7261 } else {
7262 ent->owner = 0;
7263 while (list != NULL) {
7264 list->parent = (xmlNodePtr) ctxt->node;
7265 list->doc = ctxt->myDoc;
7266 if (list->next == NULL)
7267 ent->last = list;
7268 list = list->next;
7269 }
7270 list = ent->children;
Daniel Veillard0161e632008-08-28 15:36:32 +00007271#ifdef LIBXML_LEGACY_ENABLED
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007272 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7273 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007274#endif /* LIBXML_LEGACY_ENABLED */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007275 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007276 } else {
7277 xmlFreeNodeList(list);
7278 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007279 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007280 } else if ((ret != XML_ERR_OK) &&
7281 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7282 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7283 "Entity '%s' failed to parse\n", ent->name);
Nick Wellnhofer60dded12018-01-22 15:04:58 +01007284 if (ent->content != NULL)
7285 ent->content[0] = 0;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007286 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007287 } else if (list != NULL) {
7288 xmlFreeNodeList(list);
7289 list = NULL;
7290 }
7291 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007292 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007293
7294 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7295 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007296 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007297 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007298 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007299
Daniel Veillard0161e632008-08-28 15:36:32 +00007300 /*
7301 * Now that the entity content has been gathered
7302 * provide it to the application, this can take different forms based
7303 * on the parsing modes.
7304 */
7305 if (ent->children == NULL) {
7306 /*
7307 * Probably running in SAX mode and the callbacks don't
7308 * build the entity content. So unless we already went
7309 * though parsing for first checking go though the entity
7310 * content to generate callbacks associated to the entity
7311 */
7312 if (was_checked != 0) {
7313 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007314 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007315 * This is a bit hackish but this seems the best
7316 * way to make sure both SAX and DOM entity support
7317 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007318 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007319 if (ctxt->userData == ctxt)
7320 user_data = NULL;
7321 else
7322 user_data = ctxt->userData;
7323
7324 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7325 ctxt->depth++;
7326 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7327 ent->content, user_data, NULL);
7328 ctxt->depth--;
7329 } else if (ent->etype ==
7330 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7331 ctxt->depth++;
7332 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7333 ctxt->sax, user_data, ctxt->depth,
7334 ent->URI, ent->ExternalID, NULL);
7335 ctxt->depth--;
7336 } else {
7337 ret = XML_ERR_ENTITY_PE_INTERNAL;
7338 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7339 "invalid entity type found\n", NULL);
7340 }
7341 if (ret == XML_ERR_ENTITY_LOOP) {
7342 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7343 return;
7344 }
7345 }
7346 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7347 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7348 /*
7349 * Entity reference callback comes second, it's somewhat
7350 * superfluous but a compatibility to historical behaviour
7351 */
7352 ctxt->sax->reference(ctxt->userData, ent->name);
7353 }
7354 return;
7355 }
7356
7357 /*
7358 * If we didn't get any children for the entity being built
7359 */
7360 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7361 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7362 /*
7363 * Create a node.
7364 */
7365 ctxt->sax->reference(ctxt->userData, ent->name);
7366 return;
7367 }
7368
7369 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7370 /*
7371 * There is a problem on the handling of _private for entities
7372 * (bug 155816): Should we copy the content of the field from
7373 * the entity (possibly overwriting some value set by the user
7374 * when a copy is created), should we leave it alone, or should
7375 * we try to take care of different situations? The problem
7376 * is exacerbated by the usage of this field by the xmlReader.
7377 * To fix this bug, we look at _private on the created node
7378 * and, if it's NULL, we copy in whatever was in the entity.
7379 * If it's not NULL we leave it alone. This is somewhat of a
7380 * hack - maybe we should have further tests to determine
7381 * what to do.
7382 */
7383 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7384 /*
7385 * Seems we are generating the DOM content, do
7386 * a simple tree copy for all references except the first
7387 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007388 */
7389 if (((list == NULL) && (ent->owner == 0)) ||
7390 (ctxt->parseMode == XML_PARSE_READER)) {
7391 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7392
7393 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007394 * We are copying here, make sure there is no abuse
7395 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007396 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007397 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7398 return;
7399
7400 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007401 * when operating on a reader, the entities definitions
7402 * are always owning the entities subtree.
7403 if (ctxt->parseMode == XML_PARSE_READER)
7404 ent->owner = 1;
7405 */
7406
7407 cur = ent->children;
7408 while (cur != NULL) {
7409 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7410 if (nw != NULL) {
7411 if (nw->_private == NULL)
7412 nw->_private = cur->_private;
7413 if (firstChild == NULL){
7414 firstChild = nw;
7415 }
7416 nw = xmlAddChild(ctxt->node, nw);
7417 }
7418 if (cur == ent->last) {
7419 /*
7420 * needed to detect some strange empty
7421 * node cases in the reader tests
7422 */
7423 if ((ctxt->parseMode == XML_PARSE_READER) &&
7424 (nw != NULL) &&
7425 (nw->type == XML_ELEMENT_NODE) &&
7426 (nw->children == NULL))
7427 nw->extra = 1;
7428
7429 break;
7430 }
7431 cur = cur->next;
7432 }
7433#ifdef LIBXML_LEGACY_ENABLED
7434 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7435 xmlAddEntityReference(ent, firstChild, nw);
7436#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007437 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007438 xmlNodePtr nw = NULL, cur, next, last,
7439 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007440
7441 /*
7442 * We are copying here, make sure there is no abuse
7443 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007444 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007445 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7446 return;
7447
Daniel Veillard0161e632008-08-28 15:36:32 +00007448 /*
7449 * Copy the entity child list and make it the new
7450 * entity child list. The goal is to make sure any
7451 * ID or REF referenced will be the one from the
7452 * document content and not the entity copy.
7453 */
7454 cur = ent->children;
7455 ent->children = NULL;
7456 last = ent->last;
7457 ent->last = NULL;
7458 while (cur != NULL) {
7459 next = cur->next;
7460 cur->next = NULL;
7461 cur->parent = NULL;
7462 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7463 if (nw != NULL) {
7464 if (nw->_private == NULL)
7465 nw->_private = cur->_private;
7466 if (firstChild == NULL){
7467 firstChild = cur;
7468 }
7469 xmlAddChild((xmlNodePtr) ent, nw);
7470 xmlAddChild(ctxt->node, cur);
7471 }
7472 if (cur == last)
7473 break;
7474 cur = next;
7475 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007476 if (ent->owner == 0)
7477 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007478#ifdef LIBXML_LEGACY_ENABLED
7479 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7480 xmlAddEntityReference(ent, firstChild, nw);
7481#endif /* LIBXML_LEGACY_ENABLED */
7482 } else {
7483 const xmlChar *nbktext;
7484
7485 /*
7486 * the name change is to avoid coalescing of the
7487 * node with a possible previous text one which
7488 * would make ent->children a dangling pointer
7489 */
7490 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7491 -1);
7492 if (ent->children->type == XML_TEXT_NODE)
7493 ent->children->name = nbktext;
7494 if ((ent->last != ent->children) &&
7495 (ent->last->type == XML_TEXT_NODE))
7496 ent->last->name = nbktext;
7497 xmlAddChildList(ctxt->node, ent->children);
7498 }
7499
7500 /*
7501 * This is to avoid a nasty side effect, see
7502 * characters() in SAX.c
7503 */
7504 ctxt->nodemem = 0;
7505 ctxt->nodelen = 0;
7506 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007507 }
7508 }
7509}
7510
7511/**
7512 * xmlParseEntityRef:
7513 * @ctxt: an XML parser context
7514 *
7515 * parse ENTITY references declarations
7516 *
7517 * [68] EntityRef ::= '&' Name ';'
7518 *
7519 * [ WFC: Entity Declared ]
7520 * In a document without any DTD, a document with only an internal DTD
7521 * subset which contains no parameter entity references, or a document
7522 * with "standalone='yes'", the Name given in the entity reference
7523 * must match that in an entity declaration, except that well-formed
7524 * documents need not declare any of the following entities: amp, lt,
7525 * gt, apos, quot. The declaration of a parameter entity must precede
7526 * any reference to it. Similarly, the declaration of a general entity
7527 * must precede any reference to it which appears in a default value in an
7528 * attribute-list declaration. Note that if entities are declared in the
7529 * external subset or in external parameter entities, a non-validating
7530 * processor is not obligated to read and process their declarations;
7531 * for such documents, the rule that an entity must be declared is a
7532 * well-formedness constraint only if standalone='yes'.
7533 *
7534 * [ WFC: Parsed Entity ]
7535 * An entity reference must not contain the name of an unparsed entity
7536 *
7537 * Returns the xmlEntityPtr if found, or NULL otherwise.
7538 */
7539xmlEntityPtr
7540xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007541 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007542 xmlEntityPtr ent = NULL;
7543
7544 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007545 if (ctxt->instate == XML_PARSER_EOF)
7546 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007547
Daniel Veillard0161e632008-08-28 15:36:32 +00007548 if (RAW != '&')
7549 return(NULL);
7550 NEXT;
7551 name = xmlParseName(ctxt);
7552 if (name == NULL) {
7553 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7554 "xmlParseEntityRef: no name\n");
7555 return(NULL);
7556 }
7557 if (RAW != ';') {
7558 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7559 return(NULL);
7560 }
7561 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007562
Daniel Veillard0161e632008-08-28 15:36:32 +00007563 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007564 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007565 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007566 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7567 ent = xmlGetPredefinedEntity(name);
7568 if (ent != NULL)
7569 return(ent);
7570 }
Owen Taylor3473f882001-02-23 17:55:21 +00007571
Daniel Veillard0161e632008-08-28 15:36:32 +00007572 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007573 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007574 */
7575 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007576
Daniel Veillard0161e632008-08-28 15:36:32 +00007577 /*
7578 * Ask first SAX for entity resolution, otherwise try the
7579 * entities which may have stored in the parser context.
7580 */
7581 if (ctxt->sax != NULL) {
7582 if (ctxt->sax->getEntity != NULL)
7583 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007584 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007585 (ctxt->options & XML_PARSE_OLDSAX))
7586 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007587 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7588 (ctxt->userData==ctxt)) {
7589 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007590 }
7591 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007592 if (ctxt->instate == XML_PARSER_EOF)
7593 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007594 /*
7595 * [ WFC: Entity Declared ]
7596 * In a document without any DTD, a document with only an
7597 * internal DTD subset which contains no parameter entity
7598 * references, or a document with "standalone='yes'", the
7599 * Name given in the entity reference must match that in an
7600 * entity declaration, except that well-formed documents
7601 * need not declare any of the following entities: amp, lt,
7602 * gt, apos, quot.
7603 * The declaration of a parameter entity must precede any
7604 * reference to it.
7605 * Similarly, the declaration of a general entity must
7606 * precede any reference to it which appears in a default
7607 * value in an attribute-list declaration. Note that if
7608 * entities are declared in the external subset or in
7609 * external parameter entities, a non-validating processor
7610 * is not obligated to read and process their declarations;
7611 * for such documents, the rule that an entity must be
7612 * declared is a well-formedness constraint only if
7613 * standalone='yes'.
7614 */
7615 if (ent == NULL) {
7616 if ((ctxt->standalone == 1) ||
7617 ((ctxt->hasExternalSubset == 0) &&
7618 (ctxt->hasPErefs == 0))) {
7619 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7620 "Entity '%s' not defined\n", name);
7621 } else {
7622 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7623 "Entity '%s' not defined\n", name);
7624 if ((ctxt->inSubset == 0) &&
7625 (ctxt->sax != NULL) &&
7626 (ctxt->sax->reference != NULL)) {
7627 ctxt->sax->reference(ctxt->userData, name);
7628 }
7629 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007630 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007631 ctxt->valid = 0;
7632 }
7633
7634 /*
7635 * [ WFC: Parsed Entity ]
7636 * An entity reference must not contain the name of an
7637 * unparsed entity
7638 */
7639 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7640 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7641 "Entity reference to unparsed entity %s\n", name);
7642 }
7643
7644 /*
7645 * [ WFC: No External Entity References ]
7646 * Attribute values cannot contain direct or indirect
7647 * entity references to external entities.
7648 */
7649 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7650 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7651 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7652 "Attribute references external entity '%s'\n", name);
7653 }
7654 /*
7655 * [ WFC: No < in Attribute Values ]
7656 * The replacement text of any entity referred to directly or
7657 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007658 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007659 */
7660 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007661 (ent != NULL) &&
7662 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007663 if (((ent->checked & 1) || (ent->checked == 0)) &&
7664 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007665 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7666 "'<' in entity '%s' is not allowed in attributes values\n", name);
7667 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007668 }
7669
7670 /*
7671 * Internal check, no parameter entities here ...
7672 */
7673 else {
7674 switch (ent->etype) {
7675 case XML_INTERNAL_PARAMETER_ENTITY:
7676 case XML_EXTERNAL_PARAMETER_ENTITY:
7677 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7678 "Attempt to reference the parameter entity '%s'\n",
7679 name);
7680 break;
7681 default:
7682 break;
7683 }
7684 }
7685
7686 /*
7687 * [ WFC: No Recursion ]
7688 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007689 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007690 * Done somewhere else
7691 */
Owen Taylor3473f882001-02-23 17:55:21 +00007692 return(ent);
7693}
7694
7695/**
7696 * xmlParseStringEntityRef:
7697 * @ctxt: an XML parser context
7698 * @str: a pointer to an index in the string
7699 *
7700 * parse ENTITY references declarations, but this version parses it from
7701 * a string value.
7702 *
7703 * [68] EntityRef ::= '&' Name ';'
7704 *
7705 * [ WFC: Entity Declared ]
7706 * In a document without any DTD, a document with only an internal DTD
7707 * subset which contains no parameter entity references, or a document
7708 * with "standalone='yes'", the Name given in the entity reference
7709 * must match that in an entity declaration, except that well-formed
7710 * documents need not declare any of the following entities: amp, lt,
7711 * gt, apos, quot. The declaration of a parameter entity must precede
7712 * any reference to it. Similarly, the declaration of a general entity
7713 * must precede any reference to it which appears in a default value in an
7714 * attribute-list declaration. Note that if entities are declared in the
7715 * external subset or in external parameter entities, a non-validating
7716 * processor is not obligated to read and process their declarations;
7717 * for such documents, the rule that an entity must be declared is a
7718 * well-formedness constraint only if standalone='yes'.
7719 *
7720 * [ WFC: Parsed Entity ]
7721 * An entity reference must not contain the name of an unparsed entity
7722 *
7723 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7724 * is updated to the current location in the string.
7725 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007726static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007727xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7728 xmlChar *name;
7729 const xmlChar *ptr;
7730 xmlChar cur;
7731 xmlEntityPtr ent = NULL;
7732
7733 if ((str == NULL) || (*str == NULL))
7734 return(NULL);
7735 ptr = *str;
7736 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007737 if (cur != '&')
7738 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007739
Daniel Veillard0161e632008-08-28 15:36:32 +00007740 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007741 name = xmlParseStringName(ctxt, &ptr);
7742 if (name == NULL) {
7743 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7744 "xmlParseStringEntityRef: no name\n");
7745 *str = ptr;
7746 return(NULL);
7747 }
7748 if (*ptr != ';') {
7749 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007750 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007751 *str = ptr;
7752 return(NULL);
7753 }
7754 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007755
Owen Taylor3473f882001-02-23 17:55:21 +00007756
Daniel Veillard0161e632008-08-28 15:36:32 +00007757 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007758 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007759 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007760 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7761 ent = xmlGetPredefinedEntity(name);
7762 if (ent != NULL) {
7763 xmlFree(name);
7764 *str = ptr;
7765 return(ent);
7766 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007767 }
Owen Taylor3473f882001-02-23 17:55:21 +00007768
Daniel Veillard0161e632008-08-28 15:36:32 +00007769 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007770 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007771 */
7772 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007773
Daniel Veillard0161e632008-08-28 15:36:32 +00007774 /*
7775 * Ask first SAX for entity resolution, otherwise try the
7776 * entities which may have stored in the parser context.
7777 */
7778 if (ctxt->sax != NULL) {
7779 if (ctxt->sax->getEntity != NULL)
7780 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007781 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7782 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007783 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7784 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007785 }
7786 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007787 if (ctxt->instate == XML_PARSER_EOF) {
7788 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007789 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007790 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007791
7792 /*
7793 * [ WFC: Entity Declared ]
7794 * In a document without any DTD, a document with only an
7795 * internal DTD subset which contains no parameter entity
7796 * references, or a document with "standalone='yes'", the
7797 * Name given in the entity reference must match that in an
7798 * entity declaration, except that well-formed documents
7799 * need not declare any of the following entities: amp, lt,
7800 * gt, apos, quot.
7801 * The declaration of a parameter entity must precede any
7802 * reference to it.
7803 * Similarly, the declaration of a general entity must
7804 * precede any reference to it which appears in a default
7805 * value in an attribute-list declaration. Note that if
7806 * entities are declared in the external subset or in
7807 * external parameter entities, a non-validating processor
7808 * is not obligated to read and process their declarations;
7809 * for such documents, the rule that an entity must be
7810 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007811 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007812 */
7813 if (ent == NULL) {
7814 if ((ctxt->standalone == 1) ||
7815 ((ctxt->hasExternalSubset == 0) &&
7816 (ctxt->hasPErefs == 0))) {
7817 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7818 "Entity '%s' not defined\n", name);
7819 } else {
7820 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7821 "Entity '%s' not defined\n",
7822 name);
7823 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007824 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007825 /* TODO ? check regressions ctxt->valid = 0; */
7826 }
7827
7828 /*
7829 * [ WFC: Parsed Entity ]
7830 * An entity reference must not contain the name of an
7831 * unparsed entity
7832 */
7833 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7834 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7835 "Entity reference to unparsed entity %s\n", name);
7836 }
7837
7838 /*
7839 * [ WFC: No External Entity References ]
7840 * Attribute values cannot contain direct or indirect
7841 * entity references to external entities.
7842 */
7843 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7844 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7845 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7846 "Attribute references external entity '%s'\n", name);
7847 }
7848 /*
7849 * [ WFC: No < in Attribute Values ]
7850 * The replacement text of any entity referred to directly or
7851 * indirectly in an attribute value (other than "&lt;") must
7852 * not contain a <.
7853 */
7854 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7855 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007856 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007857 (xmlStrchr(ent->content, '<'))) {
7858 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7859 "'<' in entity '%s' is not allowed in attributes values\n",
7860 name);
7861 }
7862
7863 /*
7864 * Internal check, no parameter entities here ...
7865 */
7866 else {
7867 switch (ent->etype) {
7868 case XML_INTERNAL_PARAMETER_ENTITY:
7869 case XML_EXTERNAL_PARAMETER_ENTITY:
7870 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7871 "Attempt to reference the parameter entity '%s'\n",
7872 name);
7873 break;
7874 default:
7875 break;
7876 }
7877 }
7878
7879 /*
7880 * [ WFC: No Recursion ]
7881 * A parsed entity must not contain a recursive reference
7882 * to itself, either directly or indirectly.
7883 * Done somewhere else
7884 */
7885
7886 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007887 *str = ptr;
7888 return(ent);
7889}
7890
7891/**
7892 * xmlParsePEReference:
7893 * @ctxt: an XML parser context
7894 *
7895 * parse PEReference declarations
7896 * The entity content is handled directly by pushing it's content as
7897 * a new input stream.
7898 *
7899 * [69] PEReference ::= '%' Name ';'
7900 *
7901 * [ WFC: No Recursion ]
7902 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007903 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007904 *
7905 * [ WFC: Entity Declared ]
7906 * In a document without any DTD, a document with only an internal DTD
7907 * subset which contains no parameter entity references, or a document
7908 * with "standalone='yes'", ... ... The declaration of a parameter
7909 * entity must precede any reference to it...
7910 *
7911 * [ VC: Entity Declared ]
7912 * In a document with an external subset or external parameter entities
7913 * with "standalone='no'", ... ... The declaration of a parameter entity
7914 * must precede any reference to it...
7915 *
7916 * [ WFC: In DTD ]
7917 * Parameter-entity references may only appear in the DTD.
7918 * NOTE: misleading but this is handled.
7919 */
7920void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007921xmlParsePEReference(xmlParserCtxtPtr ctxt)
7922{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007923 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007924 xmlEntityPtr entity = NULL;
7925 xmlParserInputPtr input;
7926
Daniel Veillard0161e632008-08-28 15:36:32 +00007927 if (RAW != '%')
7928 return;
7929 NEXT;
7930 name = xmlParseName(ctxt);
7931 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007932 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007933 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007934 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007935 if (xmlParserDebugEntities)
7936 xmlGenericError(xmlGenericErrorContext,
7937 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007938 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007939 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007940 return;
7941 }
7942
7943 NEXT;
7944
7945 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007946 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007947 */
7948 ctxt->nbentities++;
7949
7950 /*
7951 * Request the entity from SAX
7952 */
7953 if ((ctxt->sax != NULL) &&
7954 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007955 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7956 if (ctxt->instate == XML_PARSER_EOF)
7957 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007958 if (entity == NULL) {
7959 /*
7960 * [ WFC: Entity Declared ]
7961 * In a document without any DTD, a document with only an
7962 * internal DTD subset which contains no parameter entity
7963 * references, or a document with "standalone='yes'", ...
7964 * ... The declaration of a parameter entity must precede
7965 * any reference to it...
7966 */
7967 if ((ctxt->standalone == 1) ||
7968 ((ctxt->hasExternalSubset == 0) &&
7969 (ctxt->hasPErefs == 0))) {
7970 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7971 "PEReference: %%%s; not found\n",
7972 name);
7973 } else {
7974 /*
7975 * [ VC: Entity Declared ]
7976 * In a document with an external subset or external
7977 * parameter entities with "standalone='no'", ...
7978 * ... The declaration of a parameter entity must
7979 * precede any reference to it...
7980 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007981 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7982 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7983 "PEReference: %%%s; not found\n",
7984 name, NULL);
7985 } else
7986 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7987 "PEReference: %%%s; not found\n",
7988 name, NULL);
7989 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007990 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007991 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007992 } else {
7993 /*
7994 * Internal checking in case the entity quest barfed
7995 */
7996 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7997 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7998 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7999 "Internal: %%%s; is not a parameter entity\n",
8000 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00008001 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02008002 xmlChar start[4];
8003 xmlCharEncoding enc;
8004
Elliott Hughese54f00d2021-05-13 08:13:46 -07008005 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
8006 return;
8007
Neel Mehta90ccb582017-04-07 17:43:02 +02008008 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8009 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8010 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8011 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8012 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8013 (ctxt->replaceEntities == 0) &&
8014 (ctxt->validate == 0))
8015 return;
8016
Daniel Veillard0161e632008-08-28 15:36:32 +00008017 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02008018 if (xmlPushInput(ctxt, input) < 0) {
8019 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00008020 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02008021 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02008022
8023 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8024 /*
8025 * Get the 4 first bytes and decode the charset
8026 * if enc != XML_CHAR_ENCODING_NONE
8027 * plug some encoding conversion routines.
8028 * Note that, since we may have some non-UTF8
8029 * encoding (like UTF16, bug 135229), the 'length'
8030 * is not known, but we can calculate based upon
8031 * the amount of data in the buffer.
8032 */
8033 GROW
8034 if (ctxt->instate == XML_PARSER_EOF)
8035 return;
8036 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8037 start[0] = RAW;
8038 start[1] = NXT(1);
8039 start[2] = NXT(2);
8040 start[3] = NXT(3);
8041 enc = xmlDetectCharEncoding(start, 4);
8042 if (enc != XML_CHAR_ENCODING_NONE) {
8043 xmlSwitchEncoding(ctxt, enc);
8044 }
8045 }
8046
8047 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8048 (IS_BLANK_CH(NXT(5)))) {
8049 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02008050 }
8051 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008052 }
8053 }
8054 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008055}
8056
8057/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008058 * xmlLoadEntityContent:
8059 * @ctxt: an XML parser context
8060 * @entity: an unloaded system entity
8061 *
8062 * Load the original content of the given system entity from the
8063 * ExternalID/SystemID given. This is to be used for Included in Literal
8064 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8065 *
8066 * Returns 0 in case of success and -1 in case of failure
8067 */
8068static int
8069xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8070 xmlParserInputPtr input;
8071 xmlBufferPtr buf;
8072 int l, c;
8073 int count = 0;
8074
8075 if ((ctxt == NULL) || (entity == NULL) ||
8076 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8077 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8078 (entity->content != NULL)) {
8079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8080 "xmlLoadEntityContent parameter error");
8081 return(-1);
8082 }
8083
8084 if (xmlParserDebugEntities)
8085 xmlGenericError(xmlGenericErrorContext,
8086 "Reading %s entity content input\n", entity->name);
8087
8088 buf = xmlBufferCreate();
8089 if (buf == NULL) {
8090 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8091 "xmlLoadEntityContent parameter error");
8092 return(-1);
8093 }
8094
8095 input = xmlNewEntityInputStream(ctxt, entity);
8096 if (input == NULL) {
8097 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8098 "xmlLoadEntityContent input error");
8099 xmlBufferFree(buf);
8100 return(-1);
8101 }
8102
8103 /*
8104 * Push the entity as the current input, read char by char
8105 * saving to the buffer until the end of the entity or an error
8106 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008107 if (xmlPushInput(ctxt, input) < 0) {
8108 xmlBufferFree(buf);
8109 return(-1);
8110 }
8111
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008112 GROW;
8113 c = CUR_CHAR(l);
8114 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8115 (IS_CHAR(c))) {
8116 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008117 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008118 count = 0;
8119 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008120 if (ctxt->instate == XML_PARSER_EOF) {
8121 xmlBufferFree(buf);
8122 return(-1);
8123 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008124 }
8125 NEXTL(l);
8126 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008127 if (c == 0) {
8128 count = 0;
8129 GROW;
8130 if (ctxt->instate == XML_PARSER_EOF) {
8131 xmlBufferFree(buf);
8132 return(-1);
8133 }
8134 c = CUR_CHAR(l);
8135 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008136 }
8137
8138 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8139 xmlPopInput(ctxt);
8140 } else if (!IS_CHAR(c)) {
8141 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8142 "xmlLoadEntityContent: invalid char value %d\n",
8143 c);
8144 xmlBufferFree(buf);
8145 return(-1);
8146 }
8147 entity->content = buf->content;
8148 buf->content = NULL;
8149 xmlBufferFree(buf);
8150
8151 return(0);
8152}
8153
8154/**
Owen Taylor3473f882001-02-23 17:55:21 +00008155 * xmlParseStringPEReference:
8156 * @ctxt: an XML parser context
8157 * @str: a pointer to an index in the string
8158 *
8159 * parse PEReference declarations
8160 *
8161 * [69] PEReference ::= '%' Name ';'
8162 *
8163 * [ WFC: No Recursion ]
8164 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008165 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008166 *
8167 * [ WFC: Entity Declared ]
8168 * In a document without any DTD, a document with only an internal DTD
8169 * subset which contains no parameter entity references, or a document
8170 * with "standalone='yes'", ... ... The declaration of a parameter
8171 * entity must precede any reference to it...
8172 *
8173 * [ VC: Entity Declared ]
8174 * In a document with an external subset or external parameter entities
8175 * with "standalone='no'", ... ... The declaration of a parameter entity
8176 * must precede any reference to it...
8177 *
8178 * [ WFC: In DTD ]
8179 * Parameter-entity references may only appear in the DTD.
8180 * NOTE: misleading but this is handled.
8181 *
8182 * Returns the string of the entity content.
8183 * str is updated to the current value of the index
8184 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008185static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008186xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8187 const xmlChar *ptr;
8188 xmlChar cur;
8189 xmlChar *name;
8190 xmlEntityPtr entity = NULL;
8191
8192 if ((str == NULL) || (*str == NULL)) return(NULL);
8193 ptr = *str;
8194 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008195 if (cur != '%')
8196 return(NULL);
8197 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008198 name = xmlParseStringName(ctxt, &ptr);
8199 if (name == NULL) {
8200 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8201 "xmlParseStringPEReference: no name\n");
8202 *str = ptr;
8203 return(NULL);
8204 }
8205 cur = *ptr;
8206 if (cur != ';') {
8207 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8208 xmlFree(name);
8209 *str = ptr;
8210 return(NULL);
8211 }
8212 ptr++;
8213
8214 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008215 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00008216 */
8217 ctxt->nbentities++;
8218
8219 /*
8220 * Request the entity from SAX
8221 */
8222 if ((ctxt->sax != NULL) &&
8223 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008224 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8225 if (ctxt->instate == XML_PARSER_EOF) {
8226 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008227 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008228 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008229 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008230 if (entity == NULL) {
8231 /*
8232 * [ WFC: Entity Declared ]
8233 * In a document without any DTD, a document with only an
8234 * internal DTD subset which contains no parameter entity
8235 * references, or a document with "standalone='yes'", ...
8236 * ... The declaration of a parameter entity must precede
8237 * any reference to it...
8238 */
8239 if ((ctxt->standalone == 1) ||
8240 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8241 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8242 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008243 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008244 /*
8245 * [ VC: Entity Declared ]
8246 * In a document with an external subset or external
8247 * parameter entities with "standalone='no'", ...
8248 * ... The declaration of a parameter entity must
8249 * precede any reference to it...
8250 */
8251 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8252 "PEReference: %%%s; not found\n",
8253 name, NULL);
8254 ctxt->valid = 0;
8255 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008256 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008257 } else {
8258 /*
8259 * Internal checking in case the entity quest barfed
8260 */
8261 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8262 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8263 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8264 "%%%s; is not a parameter entity\n",
8265 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008266 }
8267 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008268 ctxt->hasPErefs = 1;
8269 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008270 *str = ptr;
8271 return(entity);
8272}
8273
8274/**
8275 * xmlParseDocTypeDecl:
8276 * @ctxt: an XML parser context
8277 *
8278 * parse a DOCTYPE declaration
8279 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008280 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008281 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8282 *
8283 * [ VC: Root Element Type ]
8284 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008285 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008286 */
8287
8288void
8289xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008290 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008291 xmlChar *ExternalID = NULL;
8292 xmlChar *URI = NULL;
8293
8294 /*
8295 * We know that '<!DOCTYPE' has been detected.
8296 */
8297 SKIP(9);
8298
8299 SKIP_BLANKS;
8300
8301 /*
8302 * Parse the DOCTYPE name.
8303 */
8304 name = xmlParseName(ctxt);
8305 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008306 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8307 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008308 }
8309 ctxt->intSubName = name;
8310
8311 SKIP_BLANKS;
8312
8313 /*
8314 * Check for SystemID and ExternalID
8315 */
8316 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8317
8318 if ((URI != NULL) || (ExternalID != NULL)) {
8319 ctxt->hasExternalSubset = 1;
8320 }
8321 ctxt->extSubURI = URI;
8322 ctxt->extSubSystem = ExternalID;
8323
8324 SKIP_BLANKS;
8325
8326 /*
8327 * Create and update the internal subset.
8328 */
8329 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8330 (!ctxt->disableSAX))
8331 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008332 if (ctxt->instate == XML_PARSER_EOF)
8333 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008334
8335 /*
8336 * Is there any internal subset declarations ?
8337 * they are handled separately in xmlParseInternalSubset()
8338 */
8339 if (RAW == '[')
8340 return;
8341
8342 /*
8343 * We should be at the end of the DOCTYPE declaration.
8344 */
8345 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008346 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008347 }
8348 NEXT;
8349}
8350
8351/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008352 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008353 * @ctxt: an XML parser context
8354 *
8355 * parse the internal subset declaration
8356 *
8357 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8358 */
8359
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008360static void
Owen Taylor3473f882001-02-23 17:55:21 +00008361xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8362 /*
8363 * Is there any DTD definition ?
8364 */
8365 if (RAW == '[') {
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008366 int baseInputNr = ctxt->inputNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008367 ctxt->instate = XML_PARSER_DTD;
8368 NEXT;
8369 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008370 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008371 * PEReferences.
8372 * Subsequence (markupdecl | PEReference | S)*
8373 */
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008374 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008375 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008376 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008377 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008378
8379 SKIP_BLANKS;
8380 xmlParseMarkupDecl(ctxt);
8381 xmlParsePEReference(ctxt);
8382
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008383 /*
8384 * Conditional sections are allowed from external entities included
8385 * by PE References in the internal subset.
8386 */
8387 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8388 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8389 xmlParseConditionalSections(ctxt);
8390 }
8391
Owen Taylor3473f882001-02-23 17:55:21 +00008392 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008393 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008394 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008395 if (ctxt->inputNr > baseInputNr)
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008396 xmlPopInput(ctxt);
8397 else
8398 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008399 }
8400 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008401 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008402 NEXT;
8403 SKIP_BLANKS;
8404 }
8405 }
8406
8407 /*
8408 * We should be at the end of the DOCTYPE declaration.
8409 */
8410 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008411 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008412 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008413 }
8414 NEXT;
8415}
8416
Daniel Veillard81273902003-09-30 00:43:48 +00008417#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008418/**
8419 * xmlParseAttribute:
8420 * @ctxt: an XML parser context
8421 * @value: a xmlChar ** used to store the value of the attribute
8422 *
8423 * parse an attribute
8424 *
8425 * [41] Attribute ::= Name Eq AttValue
8426 *
8427 * [ WFC: No External Entity References ]
8428 * Attribute values cannot contain direct or indirect entity references
8429 * to external entities.
8430 *
8431 * [ WFC: No < in Attribute Values ]
8432 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008433 * an attribute value (other than "&lt;") must not contain a <.
8434 *
Owen Taylor3473f882001-02-23 17:55:21 +00008435 * [ VC: Attribute Value Type ]
8436 * The attribute must have been declared; the value must be of the type
8437 * declared for it.
8438 *
8439 * [25] Eq ::= S? '=' S?
8440 *
8441 * With namespace:
8442 *
8443 * [NS 11] Attribute ::= QName Eq AttValue
8444 *
8445 * Also the case QName == xmlns:??? is handled independently as a namespace
8446 * definition.
8447 *
8448 * Returns the attribute name, and the value in *value.
8449 */
8450
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008451const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008452xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008453 const xmlChar *name;
8454 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008455
8456 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008457 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008458 name = xmlParseName(ctxt);
8459 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008460 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008461 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008462 return(NULL);
8463 }
8464
8465 /*
8466 * read the value
8467 */
8468 SKIP_BLANKS;
8469 if (RAW == '=') {
8470 NEXT;
8471 SKIP_BLANKS;
8472 val = xmlParseAttValue(ctxt);
8473 ctxt->instate = XML_PARSER_CONTENT;
8474 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008475 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008476 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008477 return(NULL);
8478 }
8479
8480 /*
8481 * Check that xml:lang conforms to the specification
8482 * No more registered as an error, just generate a warning now
8483 * since this was deprecated in XML second edition
8484 */
8485 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8486 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008487 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8488 "Malformed value for xml:lang : %s\n",
8489 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008490 }
8491 }
8492
8493 /*
8494 * Check that xml:space conforms to the specification
8495 */
8496 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8497 if (xmlStrEqual(val, BAD_CAST "default"))
8498 *(ctxt->space) = 0;
8499 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8500 *(ctxt->space) = 1;
8501 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008502 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008503"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008504 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008505 }
8506 }
8507
8508 *value = val;
8509 return(name);
8510}
8511
8512/**
8513 * xmlParseStartTag:
8514 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008515 *
Owen Taylor3473f882001-02-23 17:55:21 +00008516 * parse a start of tag either for rule element or
8517 * EmptyElement. In both case we don't parse the tag closing chars.
8518 *
8519 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8520 *
8521 * [ WFC: Unique Att Spec ]
8522 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008523 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008524 *
8525 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8526 *
8527 * [ WFC: Unique Att Spec ]
8528 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008529 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008530 *
8531 * With namespace:
8532 *
8533 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8534 *
8535 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8536 *
8537 * Returns the element name parsed
8538 */
8539
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008540const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008541xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008542 const xmlChar *name;
8543 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008544 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008545 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008546 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008547 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008548 int i;
8549
8550 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008551 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008552
8553 name = xmlParseName(ctxt);
8554 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008555 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008556 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008557 return(NULL);
8558 }
8559
8560 /*
8561 * Now parse the attributes, it ends up with the ending
8562 *
8563 * (S Attribute)* S?
8564 */
8565 SKIP_BLANKS;
8566 GROW;
8567
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008568 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008569 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008570 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008571 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008572 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008573
8574 attname = xmlParseAttribute(ctxt, &attvalue);
8575 if ((attname != NULL) && (attvalue != NULL)) {
8576 /*
8577 * [ WFC: Unique Att Spec ]
8578 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008579 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008580 */
8581 for (i = 0; i < nbatts;i += 2) {
8582 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008583 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008584 xmlFree(attvalue);
8585 goto failed;
8586 }
8587 }
Owen Taylor3473f882001-02-23 17:55:21 +00008588 /*
8589 * Add the pair to atts
8590 */
8591 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008592 maxatts = 22; /* allow for 10 attrs by default */
8593 atts = (const xmlChar **)
8594 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008595 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008596 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008597 if (attvalue != NULL)
8598 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008599 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008600 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008601 ctxt->atts = atts;
8602 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008603 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008604 const xmlChar **n;
8605
Owen Taylor3473f882001-02-23 17:55:21 +00008606 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008607 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008608 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008609 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008610 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008611 if (attvalue != NULL)
8612 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008613 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008614 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008615 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008616 ctxt->atts = atts;
8617 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008618 }
8619 atts[nbatts++] = attname;
8620 atts[nbatts++] = attvalue;
8621 atts[nbatts] = NULL;
8622 atts[nbatts + 1] = NULL;
8623 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008624 if (attvalue != NULL)
8625 xmlFree(attvalue);
8626 }
8627
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008628failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008629
Daniel Veillard3772de32002-12-17 10:31:45 +00008630 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008631 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8632 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008633 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008634 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8635 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008636 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008637 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8638 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008639 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8640 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008641 break;
8642 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008643 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008644 GROW;
8645 }
8646
8647 /*
8648 * SAX: Start of Element !
8649 */
8650 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008651 (!ctxt->disableSAX)) {
8652 if (nbatts > 0)
8653 ctxt->sax->startElement(ctxt->userData, name, atts);
8654 else
8655 ctxt->sax->startElement(ctxt->userData, name, NULL);
8656 }
Owen Taylor3473f882001-02-23 17:55:21 +00008657
8658 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008659 /* Free only the content strings */
8660 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008661 if (atts[i] != NULL)
8662 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008663 }
8664 return(name);
8665}
8666
8667/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008668 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008669 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008670 * @line: line of the start tag
8671 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008672 *
8673 * parse an end of tag
8674 *
8675 * [42] ETag ::= '</' Name S? '>'
8676 *
8677 * With namespace
8678 *
8679 * [NS 9] ETag ::= '</' QName S? '>'
8680 */
8681
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008682static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008683xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008684 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008685
8686 GROW;
8687 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008688 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008689 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008690 return;
8691 }
8692 SKIP(2);
8693
Daniel Veillard46de64e2002-05-29 08:21:33 +00008694 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008695
8696 /*
8697 * We should definitely be at the ending "S? '>'" part
8698 */
8699 GROW;
8700 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008701 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008702 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008703 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008704 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008705
8706 /*
8707 * [ WFC: Element Type Match ]
8708 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008709 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008710 *
8711 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008712 if (name != (xmlChar*)1) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008713 if (name == NULL) name = BAD_CAST "unparsable";
Daniel Veillardf403d292003-10-05 13:51:35 +00008714 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008715 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008716 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008717 }
8718
8719 /*
8720 * SAX: End of Tag
8721 */
8722 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8723 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008724 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008725
Daniel Veillarde57ec792003-09-10 10:50:59 +00008726 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008727 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008728 return;
8729}
8730
8731/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008732 * xmlParseEndTag:
8733 * @ctxt: an XML parser context
8734 *
8735 * parse an end of tag
8736 *
8737 * [42] ETag ::= '</' Name S? '>'
8738 *
8739 * With namespace
8740 *
8741 * [NS 9] ETag ::= '</' QName S? '>'
8742 */
8743
8744void
8745xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008746 xmlParseEndTag1(ctxt, 0);
8747}
Daniel Veillard81273902003-09-30 00:43:48 +00008748#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008749
8750/************************************************************************
8751 * *
8752 * SAX 2 specific operations *
8753 * *
8754 ************************************************************************/
8755
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756/*
8757 * xmlGetNamespace:
8758 * @ctxt: an XML parser context
8759 * @prefix: the prefix to lookup
8760 *
8761 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008762 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008763 *
8764 * Returns the namespace name or NULL if not bound
8765 */
8766static const xmlChar *
8767xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8768 int i;
8769
Daniel Veillarde57ec792003-09-10 10:50:59 +00008770 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008772 if (ctxt->nsTab[i] == prefix) {
8773 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8774 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008776 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008777 return(NULL);
8778}
8779
8780/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008781 * xmlParseQName:
8782 * @ctxt: an XML parser context
8783 * @prefix: pointer to store the prefix part
8784 *
8785 * parse an XML Namespace QName
8786 *
8787 * [6] QName ::= (Prefix ':')? LocalPart
8788 * [7] Prefix ::= NCName
8789 * [8] LocalPart ::= NCName
8790 *
8791 * Returns the Name parsed or NULL
8792 */
8793
8794static const xmlChar *
8795xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8796 const xmlChar *l, *p;
8797
8798 GROW;
8799
8800 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008801 if (l == NULL) {
8802 if (CUR == ':') {
8803 l = xmlParseName(ctxt);
8804 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008805 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008806 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008807 *prefix = NULL;
8808 return(l);
8809 }
8810 }
8811 return(NULL);
8812 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008813 if (CUR == ':') {
8814 NEXT;
8815 p = l;
8816 l = xmlParseNCName(ctxt);
8817 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008818 xmlChar *tmp;
8819
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008820 if (ctxt->instate == XML_PARSER_EOF)
8821 return(NULL);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008822 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8823 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008824 l = xmlParseNmtoken(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008825 if (l == NULL) {
8826 if (ctxt->instate == XML_PARSER_EOF)
8827 return(NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008828 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008829 } else {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008830 tmp = xmlBuildQName(l, p, NULL, 0);
8831 xmlFree((char *)l);
8832 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008833 p = xmlDictLookup(ctxt->dict, tmp, -1);
8834 if (tmp != NULL) xmlFree(tmp);
8835 *prefix = NULL;
8836 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008837 }
8838 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008839 xmlChar *tmp;
8840
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008841 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8842 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008843 NEXT;
8844 tmp = (xmlChar *) xmlParseName(ctxt);
8845 if (tmp != NULL) {
8846 tmp = xmlBuildQName(tmp, l, NULL, 0);
8847 l = xmlDictLookup(ctxt->dict, tmp, -1);
8848 if (tmp != NULL) xmlFree(tmp);
8849 *prefix = p;
8850 return(l);
8851 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008852 if (ctxt->instate == XML_PARSER_EOF)
8853 return(NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008854 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8855 l = xmlDictLookup(ctxt->dict, tmp, -1);
8856 if (tmp != NULL) xmlFree(tmp);
8857 *prefix = p;
8858 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 }
8860 *prefix = p;
8861 } else
8862 *prefix = NULL;
8863 return(l);
8864}
8865
8866/**
8867 * xmlParseQNameAndCompare:
8868 * @ctxt: an XML parser context
8869 * @name: the localname
8870 * @prefix: the prefix, if any.
8871 *
8872 * parse an XML name and compares for match
8873 * (specialized for endtag parsing)
8874 *
8875 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8876 * and the name for mismatch
8877 */
8878
8879static const xmlChar *
8880xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8881 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008882 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008883 const xmlChar *in;
8884 const xmlChar *ret;
8885 const xmlChar *prefix2;
8886
8887 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8888
8889 GROW;
8890 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008891
Daniel Veillard0fb18932003-09-07 09:14:37 +00008892 cmp = prefix;
8893 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008894 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008895 ++cmp;
8896 }
8897 if ((*cmp == 0) && (*in == ':')) {
8898 in++;
8899 cmp = name;
8900 while (*in != 0 && *in == *cmp) {
8901 ++in;
8902 ++cmp;
8903 }
William M. Brack76e95df2003-10-18 16:20:14 +00008904 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008905 /* success */
Haibo Huangf0a546b2020-09-01 20:28:19 -07008906 ctxt->input->col += in - ctxt->input->cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008907 ctxt->input->cur = in;
8908 return((const xmlChar*) 1);
8909 }
8910 }
8911 /*
8912 * all strings coms from the dictionary, equality can be done directly
8913 */
8914 ret = xmlParseQName (ctxt, &prefix2);
8915 if ((ret == name) && (prefix == prefix2))
8916 return((const xmlChar*) 1);
8917 return ret;
8918}
8919
8920/**
8921 * xmlParseAttValueInternal:
8922 * @ctxt: an XML parser context
8923 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008924 * @alloc: whether the attribute was reallocated as a new string
8925 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008926 *
8927 * parse a value for an attribute.
8928 * NOTE: if no normalization is needed, the routine will return pointers
8929 * directly from the data buffer.
8930 *
8931 * 3.3.3 Attribute-Value Normalization:
8932 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008933 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 * - a character reference is processed by appending the referenced
8935 * character to the attribute value
8936 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008937 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008938 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8939 * appending #x20 to the normalized value, except that only a single
8940 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008941 * parsed entity or the literal entity value of an internal parsed entity
8942 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008943 * If the declared value is not CDATA, then the XML processor must further
8944 * process the normalized attribute value by discarding any leading and
8945 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008946 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008947 * All attributes for which no declaration has been read should be treated
8948 * by a non-validating parser as if declared CDATA.
8949 *
8950 * Returns the AttValue parsed or NULL. The value has to be freed by the
8951 * caller if it was copied, this can be detected by val[*len] == 0.
8952 */
8953
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008954#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8955 const xmlChar *oldbase = ctxt->input->base;\
8956 GROW;\
8957 if (ctxt->instate == XML_PARSER_EOF)\
8958 return(NULL);\
8959 if (oldbase != ctxt->input->base) {\
8960 ptrdiff_t delta = ctxt->input->base - oldbase;\
8961 start = start + delta;\
8962 in = in + delta;\
8963 }\
8964 end = ctxt->input->end;
8965
Daniel Veillard0fb18932003-09-07 09:14:37 +00008966static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008967xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8968 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008969{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008970 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008971 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008972 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008973 int line, col;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02008974 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8975 XML_MAX_HUGE_LENGTH :
8976 XML_MAX_TEXT_LENGTH;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008977
8978 GROW;
8979 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008980 line = ctxt->input->line;
8981 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008982 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008983 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008984 return (NULL);
8985 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008986 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008987
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008988 /*
8989 * try to handle in this routine the most common case where no
8990 * allocation of a new string is required and where content is
8991 * pure ASCII.
8992 */
8993 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008994 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008995 end = ctxt->input->end;
8996 start = in;
8997 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008998 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillard0fb18932003-09-07 09:14:37 +00008999 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009000 if (normalize) {
9001 /*
9002 * Skip any leading spaces
9003 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009004 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009005 ((*in == 0x20) || (*in == 0x9) ||
9006 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009007 if (*in == 0xA) {
9008 line++; col = 1;
9009 } else {
9010 col++;
9011 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009012 in++;
9013 start = in;
9014 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009015 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02009016 if ((in - start) > maxLength) {
Daniel Veillarde17db992012-07-19 11:25:16 +08009017 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009018 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009019 return(NULL);
9020 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009021 }
9022 }
9023 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9024 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009025 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009026 if ((*in++ == 0x20) && (*in == 0x20)) break;
9027 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009028 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02009029 if ((in - start) > maxLength) {
Daniel Veillarde17db992012-07-19 11:25:16 +08009030 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009031 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009032 return(NULL);
9033 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009034 }
9035 }
9036 last = in;
9037 /*
9038 * skip the trailing blanks
9039 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009040 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009041 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009042 ((*in == 0x20) || (*in == 0x9) ||
9043 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009044 if (*in == 0xA) {
9045 line++, col = 1;
9046 } else {
9047 col++;
9048 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009049 in++;
9050 if (in >= end) {
9051 const xmlChar *oldbase = ctxt->input->base;
9052 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009053 if (ctxt->instate == XML_PARSER_EOF)
9054 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009055 if (oldbase != ctxt->input->base) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009056 ptrdiff_t delta = ctxt->input->base - oldbase;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009057 start = start + delta;
9058 in = in + delta;
9059 last = last + delta;
9060 }
9061 end = ctxt->input->end;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02009062 if ((in - start) > maxLength) {
Daniel Veillarde17db992012-07-19 11:25:16 +08009063 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009064 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009065 return(NULL);
9066 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009067 }
9068 }
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02009069 if ((in - start) > maxLength) {
Daniel Veillarde17db992012-07-19 11:25:16 +08009070 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009071 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009072 return(NULL);
9073 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009074 if (*in != limit) goto need_complex;
9075 } else {
9076 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9077 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9078 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009079 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009080 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009081 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02009082 if ((in - start) > maxLength) {
Daniel Veillarde17db992012-07-19 11:25:16 +08009083 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009084 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009085 return(NULL);
9086 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009087 }
9088 }
9089 last = in;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02009090 if ((in - start) > maxLength) {
Daniel Veillarde17db992012-07-19 11:25:16 +08009091 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009092 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009093 return(NULL);
9094 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009095 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009096 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009097 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009098 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009099 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009100 *len = last - start;
9101 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009102 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009103 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009104 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009105 }
9106 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009107 ctxt->input->line = line;
9108 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009109 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009110 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009111need_complex:
9112 if (alloc) *alloc = 1;
9113 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009114}
9115
9116/**
9117 * xmlParseAttribute2:
9118 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009119 * @pref: the element prefix
9120 * @elem: the element name
9121 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009122 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009123 * @len: an int * to save the length of the attribute
9124 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009125 *
9126 * parse an attribute in the new SAX2 framework.
9127 *
9128 * Returns the attribute name, and the value in *value, .
9129 */
9130
9131static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009132xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009133 const xmlChar * pref, const xmlChar * elem,
9134 const xmlChar ** prefix, xmlChar ** value,
9135 int *len, int *alloc)
9136{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009137 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009138 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009139 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009140
9141 *value = NULL;
9142 GROW;
9143 name = xmlParseQName(ctxt, prefix);
9144 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009145 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9146 "error parsing attribute name\n");
9147 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009148 }
9149
9150 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009151 * get the type if needed
9152 */
9153 if (ctxt->attsSpecial != NULL) {
9154 int type;
9155
Nick Wellnhoferd422b952017-10-09 13:37:42 +02009156 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9157 pref, elem, *prefix, name);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009158 if (type != 0)
9159 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009160 }
9161
9162 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009163 * read the value
9164 */
9165 SKIP_BLANKS;
9166 if (RAW == '=') {
9167 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009168 SKIP_BLANKS;
9169 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9170 if (normalize) {
9171 /*
9172 * Sometimes a second normalisation pass for spaces is needed
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009173 * but that only happens if charrefs or entities references
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009174 * have been used in the attribute value, i.e. the attribute
9175 * value have been extracted in an allocated string already.
9176 */
9177 if (*alloc) {
9178 const xmlChar *val2;
9179
9180 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009181 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009182 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009183 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009184 }
9185 }
9186 }
9187 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009188 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009189 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009190 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009191 name);
9192 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009193 }
9194
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009195 if (*prefix == ctxt->str_xml) {
9196 /*
9197 * Check that xml:lang conforms to the specification
9198 * No more registered as an error, just generate a warning now
9199 * since this was deprecated in XML second edition
9200 */
9201 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9202 internal_val = xmlStrndup(val, *len);
9203 if (!xmlCheckLanguageID(internal_val)) {
9204 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9205 "Malformed value for xml:lang : %s\n",
9206 internal_val, NULL);
9207 }
9208 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009209
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009210 /*
9211 * Check that xml:space conforms to the specification
9212 */
9213 if (xmlStrEqual(name, BAD_CAST "space")) {
9214 internal_val = xmlStrndup(val, *len);
9215 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9216 *(ctxt->space) = 0;
9217 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9218 *(ctxt->space) = 1;
9219 else {
9220 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9221 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9222 internal_val, NULL);
9223 }
9224 }
9225 if (internal_val) {
9226 xmlFree(internal_val);
9227 }
9228 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009229
9230 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009231 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009232}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009233/**
9234 * xmlParseStartTag2:
9235 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009236 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009237 * parse a start of tag either for rule element or
9238 * EmptyElement. In both case we don't parse the tag closing chars.
9239 * This routine is called when running SAX2 parsing
9240 *
9241 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9242 *
9243 * [ WFC: Unique Att Spec ]
9244 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009245 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009246 *
9247 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9248 *
9249 * [ WFC: Unique Att Spec ]
9250 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009251 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009252 *
9253 * With namespace:
9254 *
9255 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9256 *
9257 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9258 *
9259 * Returns the element name parsed
9260 */
9261
9262static const xmlChar *
9263xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009264 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009265 const xmlChar *localname;
9266 const xmlChar *prefix;
9267 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009268 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009269 const xmlChar *nsname;
9270 xmlChar *attvalue;
9271 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009272 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009273 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009274 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009275 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009276 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009277
9278 if (RAW != '<') return(NULL);
9279 NEXT1;
9280
9281 /*
9282 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9283 * point since the attribute values may be stored as pointers to
9284 * the buffer and calling SHRINK would destroy them !
9285 * The Shrinking is only possible once the full set of attribute
9286 * callbacks have been done.
9287 */
9288 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009289 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009290 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009291 nbatts = 0;
9292 nratts = 0;
9293 nbdef = 0;
9294 nbNs = 0;
9295 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009296 /* Forget any namespaces added during an earlier parse of this element. */
9297 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009298
9299 localname = xmlParseQName(ctxt, &prefix);
9300 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009301 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9302 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009303 return(NULL);
9304 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009305 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009306
9307 /*
9308 * Now parse the attributes, it ends up with the ending
9309 *
9310 * (S Attribute)* S?
9311 */
9312 SKIP_BLANKS;
9313 GROW;
9314
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009315 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009316 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009317 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009318 const xmlChar *q = CUR_PTR;
9319 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009320 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009321
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009322 attname = xmlParseAttribute2(ctxt, prefix, localname,
9323 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009324 if ((attname == NULL) || (attvalue == NULL))
9325 goto next_attr;
9326 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009327
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009328 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9329 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9330 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009331
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009332 if (URL == NULL) {
9333 xmlErrMemory(ctxt, "dictionary allocation failure");
9334 if ((attvalue != NULL) && (alloc != 0))
9335 xmlFree(attvalue);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009336 localname = NULL;
9337 goto done;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009338 }
9339 if (*URL != 0) {
9340 uri = xmlParseURI((const char *) URL);
9341 if (uri == NULL) {
9342 xmlNsErr(ctxt, XML_WAR_NS_URI,
9343 "xmlns: '%s' is not a valid URI\n",
9344 URL, NULL, NULL);
9345 } else {
9346 if (uri->scheme == NULL) {
9347 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9348 "xmlns: URI %s is not absolute\n",
9349 URL, NULL, NULL);
9350 }
9351 xmlFreeURI(uri);
9352 }
Daniel Veillard37334572008-07-31 08:20:02 +00009353 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009354 if (attname != ctxt->str_xml) {
9355 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9356 "xml namespace URI cannot be the default namespace\n",
9357 NULL, NULL, NULL);
9358 }
9359 goto next_attr;
9360 }
9361 if ((len == 29) &&
9362 (xmlStrEqual(URL,
9363 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9364 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9365 "reuse of the xmlns namespace name is forbidden\n",
9366 NULL, NULL, NULL);
9367 goto next_attr;
9368 }
9369 }
9370 /*
9371 * check that it's not a defined namespace
9372 */
9373 for (j = 1;j <= nbNs;j++)
9374 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9375 break;
9376 if (j <= nbNs)
9377 xmlErrAttributeDup(ctxt, NULL, attname);
9378 else
9379 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009380
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009381 } else if (aprefix == ctxt->str_xmlns) {
9382 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9383 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009384
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009385 if (attname == ctxt->str_xml) {
9386 if (URL != ctxt->str_xml_ns) {
9387 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9388 "xml namespace prefix mapped to wrong URI\n",
9389 NULL, NULL, NULL);
9390 }
9391 /*
9392 * Do not keep a namespace definition node
9393 */
9394 goto next_attr;
9395 }
9396 if (URL == ctxt->str_xml_ns) {
9397 if (attname != ctxt->str_xml) {
9398 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399 "xml namespace URI mapped to wrong prefix\n",
9400 NULL, NULL, NULL);
9401 }
9402 goto next_attr;
9403 }
9404 if (attname == ctxt->str_xmlns) {
9405 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9406 "redefinition of the xmlns prefix is forbidden\n",
9407 NULL, NULL, NULL);
9408 goto next_attr;
9409 }
9410 if ((len == 29) &&
9411 (xmlStrEqual(URL,
9412 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9413 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9414 "reuse of the xmlns namespace name is forbidden\n",
9415 NULL, NULL, NULL);
9416 goto next_attr;
9417 }
9418 if ((URL == NULL) || (URL[0] == 0)) {
9419 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9420 "xmlns:%s: Empty XML namespace is not allowed\n",
9421 attname, NULL, NULL);
9422 goto next_attr;
9423 } else {
9424 uri = xmlParseURI((const char *) URL);
9425 if (uri == NULL) {
9426 xmlNsErr(ctxt, XML_WAR_NS_URI,
9427 "xmlns:%s: '%s' is not a valid URI\n",
9428 attname, URL, NULL);
9429 } else {
9430 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9431 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9432 "xmlns:%s: URI %s is not absolute\n",
9433 attname, URL, NULL);
9434 }
9435 xmlFreeURI(uri);
9436 }
9437 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009438
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009439 /*
9440 * check that it's not a defined namespace
9441 */
9442 for (j = 1;j <= nbNs;j++)
9443 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9444 break;
9445 if (j <= nbNs)
9446 xmlErrAttributeDup(ctxt, aprefix, attname);
9447 else
9448 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9449
9450 } else {
9451 /*
9452 * Add the pair to atts
9453 */
9454 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9455 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9456 goto next_attr;
9457 }
9458 maxatts = ctxt->maxatts;
9459 atts = ctxt->atts;
9460 }
9461 ctxt->attallocs[nratts++] = alloc;
9462 atts[nbatts++] = attname;
9463 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009464 /*
9465 * The namespace URI field is used temporarily to point at the
9466 * base of the current input buffer for non-alloced attributes.
9467 * When the input buffer is reallocated, all the pointers become
9468 * invalid, but they can be reconstructed later.
9469 */
9470 if (alloc)
9471 atts[nbatts++] = NULL;
9472 else
9473 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009474 atts[nbatts++] = attvalue;
9475 attvalue += len;
9476 atts[nbatts++] = attvalue;
9477 /*
9478 * tag if some deallocation is needed
9479 */
9480 if (alloc != 0) attval = 1;
9481 attvalue = NULL; /* moved into atts */
9482 }
9483
9484next_attr:
9485 if ((attvalue != NULL) && (alloc != 0)) {
9486 xmlFree(attvalue);
9487 attvalue = NULL;
9488 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009489
9490 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009491 if (ctxt->instate == XML_PARSER_EOF)
9492 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009493 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9494 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009495 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9497 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009498 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009499 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009500 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9501 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009502 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009503 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009504 break;
9505 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009506 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009507 }
9508
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009509 if (ctxt->input->id != inputid) {
9510 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9511 "Unexpected change of input\n");
9512 localname = NULL;
9513 goto done;
9514 }
9515
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009516 /* Reconstruct attribute value pointers. */
9517 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9518 if (atts[i+2] != NULL) {
9519 /*
9520 * Arithmetic on dangling pointers is technically undefined
9521 * behavior, but well...
9522 */
9523 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9524 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9525 atts[i+3] += offset; /* value */
9526 atts[i+4] += offset; /* valuend */
9527 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009528 }
9529
Daniel Veillard0fb18932003-09-07 09:14:37 +00009530 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009531 * The attributes defaulting
9532 */
9533 if (ctxt->attsDefault != NULL) {
9534 xmlDefAttrsPtr defaults;
9535
9536 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9537 if (defaults != NULL) {
9538 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009539 attname = defaults->values[5 * i];
9540 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009541
9542 /*
9543 * special work for namespaces defaulted defs
9544 */
9545 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9546 /*
9547 * check that it's not a defined namespace
9548 */
9549 for (j = 1;j <= nbNs;j++)
9550 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9551 break;
9552 if (j <= nbNs) continue;
9553
9554 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009555 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009556 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009557 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009558 nbNs++;
9559 }
9560 } else if (aprefix == ctxt->str_xmlns) {
9561 /*
9562 * check that it's not a defined namespace
9563 */
9564 for (j = 1;j <= nbNs;j++)
9565 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9566 break;
9567 if (j <= nbNs) continue;
9568
9569 nsname = xmlGetNamespace(ctxt, attname);
9570 if (nsname != defaults->values[2]) {
9571 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009572 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009573 nbNs++;
9574 }
9575 } else {
9576 /*
9577 * check that it's not a defined attribute
9578 */
9579 for (j = 0;j < nbatts;j+=5) {
9580 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9581 break;
9582 }
9583 if (j < nbatts) continue;
9584
9585 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9586 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009587 localname = NULL;
9588 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009589 }
9590 maxatts = ctxt->maxatts;
9591 atts = ctxt->atts;
9592 }
9593 atts[nbatts++] = attname;
9594 atts[nbatts++] = aprefix;
9595 if (aprefix == NULL)
9596 atts[nbatts++] = NULL;
9597 else
9598 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009599 atts[nbatts++] = defaults->values[5 * i + 2];
9600 atts[nbatts++] = defaults->values[5 * i + 3];
9601 if ((ctxt->standalone == 1) &&
9602 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009603 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009604 "standalone: attribute %s on %s defaulted from external subset\n",
9605 attname, localname);
9606 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009607 nbdef++;
9608 }
9609 }
9610 }
9611 }
9612
Daniel Veillarde70c8772003-11-25 07:21:18 +00009613 /*
9614 * The attributes checkings
9615 */
9616 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009617 /*
9618 * The default namespace does not apply to attribute names.
9619 */
9620 if (atts[i + 1] != NULL) {
9621 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9622 if (nsname == NULL) {
9623 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9624 "Namespace prefix %s for %s on %s is not defined\n",
9625 atts[i + 1], atts[i], localname);
9626 }
9627 atts[i + 2] = nsname;
9628 } else
9629 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009630 /*
9631 * [ WFC: Unique Att Spec ]
9632 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009633 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009634 * As extended by the Namespace in XML REC.
9635 */
9636 for (j = 0; j < i;j += 5) {
9637 if (atts[i] == atts[j]) {
9638 if (atts[i+1] == atts[j+1]) {
9639 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9640 break;
9641 }
9642 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9643 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9644 "Namespaced Attribute %s in '%s' redefined\n",
9645 atts[i], nsname, NULL);
9646 break;
9647 }
9648 }
9649 }
9650 }
9651
Daniel Veillarde57ec792003-09-10 10:50:59 +00009652 nsname = xmlGetNamespace(ctxt, prefix);
9653 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009654 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9655 "Namespace prefix %s on %s is not defined\n",
9656 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009657 }
9658 *pref = prefix;
9659 *URI = nsname;
9660
9661 /*
9662 * SAX: Start of Element !
9663 */
9664 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9665 (!ctxt->disableSAX)) {
9666 if (nbNs > 0)
9667 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9668 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9669 nbatts / 5, nbdef, atts);
9670 else
9671 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9672 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9673 }
9674
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009675done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009676 /*
9677 * Free up attribute allocated strings if needed
9678 */
9679 if (attval != 0) {
9680 for (i = 3,j = 0; j < nratts;i += 5,j++)
9681 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9682 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009683 }
9684
9685 return(localname);
9686}
9687
9688/**
9689 * xmlParseEndTag2:
9690 * @ctxt: an XML parser context
9691 * @line: line of the start tag
9692 * @nsNr: number of namespaces on the start tag
9693 *
9694 * parse an end of tag
9695 *
9696 * [42] ETag ::= '</' Name S? '>'
9697 *
9698 * With namespace
9699 *
9700 * [NS 9] ETag ::= '</' QName S? '>'
9701 */
9702
9703static void
Elliott Hughese54f00d2021-05-13 08:13:46 -07009704xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009705 const xmlChar *name;
9706
9707 GROW;
9708 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009709 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009710 return;
9711 }
9712 SKIP(2);
9713
Elliott Hughese54f00d2021-05-13 08:13:46 -07009714 if (tag->prefix == NULL)
9715 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9716 else
9717 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009718
9719 /*
9720 * We should definitely be at the ending "S? '>'" part
9721 */
9722 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009723 if (ctxt->instate == XML_PARSER_EOF)
9724 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009725 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009726 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009727 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009728 } else
9729 NEXT1;
9730
9731 /*
9732 * [ WFC: Element Type Match ]
9733 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009734 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009735 *
9736 */
9737 if (name != (xmlChar*)1) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009738 if (name == NULL) name = BAD_CAST "unparsable";
Daniel Veillardf403d292003-10-05 13:51:35 +00009739 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009740 "Opening and ending tag mismatch: %s line %d and %s\n",
Elliott Hughese54f00d2021-05-13 08:13:46 -07009741 ctxt->name, tag->line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009742 }
9743
9744 /*
9745 * SAX: End of Tag
9746 */
9747 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9748 (!ctxt->disableSAX))
Elliott Hughese54f00d2021-05-13 08:13:46 -07009749 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9750 tag->URI);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009751
Daniel Veillard0fb18932003-09-07 09:14:37 +00009752 spacePop(ctxt);
Elliott Hughese54f00d2021-05-13 08:13:46 -07009753 if (tag->nsNr != 0)
9754 nsPop(ctxt, tag->nsNr);
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009755}
9756
9757/**
Owen Taylor3473f882001-02-23 17:55:21 +00009758 * xmlParseCDSect:
9759 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009760 *
Owen Taylor3473f882001-02-23 17:55:21 +00009761 * Parse escaped pure raw content.
9762 *
9763 * [18] CDSect ::= CDStart CData CDEnd
9764 *
9765 * [19] CDStart ::= '<![CDATA['
9766 *
9767 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9768 *
9769 * [21] CDEnd ::= ']]>'
9770 */
9771void
9772xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9773 xmlChar *buf = NULL;
9774 int len = 0;
9775 int size = XML_PARSER_BUFFER_SIZE;
9776 int r, rl;
9777 int s, sl;
9778 int cur, l;
9779 int count = 0;
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02009780 int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9781 XML_MAX_HUGE_LENGTH :
9782 XML_MAX_TEXT_LENGTH;
Owen Taylor3473f882001-02-23 17:55:21 +00009783
Daniel Veillard8f597c32003-10-06 08:19:27 +00009784 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009785 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009786 SKIP(9);
9787 } else
9788 return;
9789
9790 ctxt->instate = XML_PARSER_CDATA_SECTION;
9791 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009792 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009793 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009794 ctxt->instate = XML_PARSER_CONTENT;
9795 return;
9796 }
9797 NEXTL(rl);
9798 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009799 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009800 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009801 ctxt->instate = XML_PARSER_CONTENT;
9802 return;
9803 }
9804 NEXTL(sl);
9805 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009806 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009807 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009808 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009809 return;
9810 }
William M. Brack871611b2003-10-18 04:53:14 +00009811 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009812 ((r != ']') || (s != ']') || (cur != '>'))) {
9813 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009814 xmlChar *tmp;
9815
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009816 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009817 if (tmp == NULL) {
9818 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009819 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009820 return;
9821 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009822 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009823 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009824 }
9825 COPY_BUF(rl,buf,len,r);
9826 r = s;
9827 rl = sl;
9828 s = cur;
9829 sl = l;
9830 count++;
9831 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08009832 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00009833 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009834 if (ctxt->instate == XML_PARSER_EOF) {
9835 xmlFree(buf);
9836 return;
9837 }
Owen Taylor3473f882001-02-23 17:55:21 +00009838 count = 0;
9839 }
9840 NEXTL(l);
9841 cur = CUR_CHAR(l);
Nick Wellnhoferb83e2ff2022-08-25 17:43:08 +02009842 if (len > maxLength) {
9843 xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9844 "CData section too big found\n");
9845 xmlFree(buf);
9846 return;
9847 }
Owen Taylor3473f882001-02-23 17:55:21 +00009848 }
9849 buf[len] = 0;
9850 ctxt->instate = XML_PARSER_CONTENT;
9851 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009852 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009853 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009854 xmlFree(buf);
9855 return;
9856 }
9857 NEXTL(l);
9858
9859 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009860 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009861 */
9862 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9863 if (ctxt->sax->cdataBlock != NULL)
9864 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009865 else if (ctxt->sax->characters != NULL)
9866 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009867 }
9868 xmlFree(buf);
9869}
9870
9871/**
Elliott Hughese54f00d2021-05-13 08:13:46 -07009872 * xmlParseContentInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00009873 * @ctxt: an XML parser context
9874 *
Elliott Hughese54f00d2021-05-13 08:13:46 -07009875 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9876 * unexpected EOF to the caller.
Owen Taylor3473f882001-02-23 17:55:21 +00009877 */
9878
Elliott Hughese54f00d2021-05-13 08:13:46 -07009879static void
9880xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009881 int nameNr = ctxt->nameNr;
9882
Owen Taylor3473f882001-02-23 17:55:21 +00009883 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009884 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009885 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009886 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009887 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009888 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009889
9890 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009891 * First case : a Processing Instruction.
9892 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009893 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009894 xmlParsePI(ctxt);
9895 }
9896
9897 /*
9898 * Second case : a CDSection
9899 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009900 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009901 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009902 xmlParseCDSect(ctxt);
9903 }
9904
9905 /*
9906 * Third case : a comment
9907 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009908 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009909 (NXT(2) == '-') && (NXT(3) == '-')) {
9910 xmlParseComment(ctxt);
9911 ctxt->instate = XML_PARSER_CONTENT;
9912 }
9913
9914 /*
9915 * Fourth case : a sub-element.
9916 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009917 else if (*cur == '<') {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009918 if (NXT(1) == '/') {
9919 if (ctxt->nameNr <= nameNr)
9920 break;
9921 xmlParseElementEnd(ctxt);
9922 } else {
9923 xmlParseElementStart(ctxt);
9924 }
Owen Taylor3473f882001-02-23 17:55:21 +00009925 }
9926
9927 /*
9928 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009929 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009930 */
9931
Daniel Veillard21a0f912001-02-25 19:54:14 +00009932 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009933 xmlParseReference(ctxt);
9934 }
9935
9936 /*
9937 * Last case, text. Note that References are handled directly.
9938 */
9939 else {
9940 xmlParseCharData(ctxt, 0);
9941 }
9942
9943 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009944 SHRINK;
9945
Daniel Veillardfdc91562002-07-01 21:52:03 +00009946 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009947 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9948 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009949 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009950 break;
9951 }
9952 }
9953}
9954
9955/**
Elliott Hughese54f00d2021-05-13 08:13:46 -07009956 * xmlParseContent:
9957 * @ctxt: an XML parser context
9958 *
9959 * Parse a content sequence. Stops at EOF or '</'.
9960 *
9961 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9962 */
9963
9964void
9965xmlParseContent(xmlParserCtxtPtr ctxt) {
9966 int nameNr = ctxt->nameNr;
9967
9968 xmlParseContentInternal(ctxt);
9969
9970 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9971 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9972 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9973 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9974 "Premature end of data in tag %s line %d\n",
9975 name, line, NULL);
9976 }
9977}
9978
9979/**
Owen Taylor3473f882001-02-23 17:55:21 +00009980 * xmlParseElement:
9981 * @ctxt: an XML parser context
9982 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009983 * parse an XML element
Owen Taylor3473f882001-02-23 17:55:21 +00009984 *
9985 * [39] element ::= EmptyElemTag | STag content ETag
9986 *
9987 * [ WFC: Element Type Match ]
9988 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009989 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009990 *
Owen Taylor3473f882001-02-23 17:55:21 +00009991 */
9992
9993void
9994xmlParseElement(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009995 if (xmlParseElementStart(ctxt) != 0)
9996 return;
Elliott Hughese54f00d2021-05-13 08:13:46 -07009997
9998 xmlParseContentInternal(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009999 if (ctxt->instate == XML_PARSER_EOF)
10000 return;
Elliott Hughese54f00d2021-05-13 08:13:46 -070010001
10002 if (CUR == 0) {
10003 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10004 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10005 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10006 "Premature end of data in tag %s line %d\n",
10007 name, line, NULL);
10008 return;
10009 }
10010
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010011 xmlParseElementEnd(ctxt);
10012}
10013
10014/**
10015 * xmlParseElementStart:
10016 * @ctxt: an XML parser context
10017 *
10018 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10019 * opening tag was parsed, 1 if an empty element was parsed.
10020 */
10021static int
10022xmlParseElementStart(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010023 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010024 const xmlChar *prefix = NULL;
10025 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010026 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010027 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010028 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010029 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010030
Daniel Veillard8915c152008-08-26 13:05:34 +000010031 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10032 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10033 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10034 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10035 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010036 xmlHaltParser(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010037 return(-1);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010038 }
10039
Owen Taylor3473f882001-02-23 17:55:21 +000010040 /* Capture start position */
10041 if (ctxt->record_info) {
10042 node_info.begin_pos = ctxt->input->consumed +
10043 (CUR_PTR - ctxt->input->base);
10044 node_info.begin_line = ctxt->input->line;
10045 }
10046
10047 if (ctxt->spaceNr == 0)
10048 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010049 else if (*ctxt->space == -2)
10050 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010051 else
10052 spacePush(ctxt, *ctxt->space);
10053
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010054 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010055#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010056 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010057#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010058 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010059#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010060 else
10061 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010062#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010063 if (ctxt->instate == XML_PARSER_EOF)
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010064 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010065 if (name == NULL) {
10066 spacePop(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010067 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010068 }
Elliott Hughese54f00d2021-05-13 08:13:46 -070010069 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010070 ret = ctxt->node;
10071
Daniel Veillard4432df22003-09-28 18:58:27 +000010072#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010073 /*
10074 * [ VC: Root Element Type ]
10075 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010076 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010077 */
10078 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10079 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10080 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010081#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010082
10083 /*
10084 * Check for an Empty Element.
10085 */
10086 if ((RAW == '/') && (NXT(1) == '>')) {
10087 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010088 if (ctxt->sax2) {
10089 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10090 (!ctxt->disableSAX))
10091 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010092#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010093 } else {
10094 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10095 (!ctxt->disableSAX))
10096 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010097#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010098 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010099 namePop(ctxt);
10100 spacePop(ctxt);
10101 if (nsNr != ctxt->nsNr)
10102 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010103 if ( ret != NULL && ctxt->record_info ) {
10104 node_info.end_pos = ctxt->input->consumed +
10105 (CUR_PTR - ctxt->input->base);
10106 node_info.end_line = ctxt->input->line;
10107 node_info.node = ret;
10108 xmlParserAddNodeInfo(ctxt, &node_info);
10109 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010110 return(1);
Owen Taylor3473f882001-02-23 17:55:21 +000010111 }
10112 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010113 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010114 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010115 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10116 "Couldn't find end of Start Tag %s line %d\n",
10117 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010118
10119 /*
10120 * end of parsing of this node.
10121 */
10122 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010123 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010124 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010125 if (nsNr != ctxt->nsNr)
10126 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010127
10128 /*
10129 * Capture end position and add node
10130 */
10131 if ( ret != NULL && ctxt->record_info ) {
10132 node_info.end_pos = ctxt->input->consumed +
10133 (CUR_PTR - ctxt->input->base);
10134 node_info.end_line = ctxt->input->line;
10135 node_info.node = ret;
10136 xmlParserAddNodeInfo(ctxt, &node_info);
10137 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010138 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010139 }
10140
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010141 return(0);
10142}
Owen Taylor3473f882001-02-23 17:55:21 +000010143
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010144/**
10145 * xmlParseElementEnd:
10146 * @ctxt: an XML parser context
10147 *
10148 * Parse the end of an XML element.
10149 */
10150static void
10151xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10152 xmlParserNodeInfo node_info;
10153 xmlNodePtr ret = ctxt->node;
10154
10155 if (ctxt->nameNr <= 0)
10156 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010157
10158 /*
10159 * parse the end of tag: '</' should be here.
10160 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010161 if (ctxt->sax2) {
Elliott Hughese54f00d2021-05-13 08:13:46 -070010162 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010163 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010164 }
10165#ifdef LIBXML_SAX1_ENABLED
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010166 else
10167 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010168#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010169
10170 /*
10171 * Capture end position and add node
10172 */
10173 if ( ret != NULL && ctxt->record_info ) {
10174 node_info.end_pos = ctxt->input->consumed +
10175 (CUR_PTR - ctxt->input->base);
10176 node_info.end_line = ctxt->input->line;
10177 node_info.node = ret;
10178 xmlParserAddNodeInfo(ctxt, &node_info);
10179 }
10180}
10181
10182/**
10183 * xmlParseVersionNum:
10184 * @ctxt: an XML parser context
10185 *
10186 * parse the XML version value.
10187 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010188 * [26] VersionNum ::= '1.' [0-9]+
10189 *
10190 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010191 *
10192 * Returns the string giving the XML version number, or NULL
10193 */
10194xmlChar *
10195xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10196 xmlChar *buf = NULL;
10197 int len = 0;
10198 int size = 10;
10199 xmlChar cur;
10200
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010201 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010202 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010203 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010204 return(NULL);
10205 }
10206 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010207 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010208 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010209 return(NULL);
10210 }
10211 buf[len++] = cur;
10212 NEXT;
10213 cur=CUR;
10214 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010215 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010216 return(NULL);
10217 }
10218 buf[len++] = cur;
10219 NEXT;
10220 cur=CUR;
10221 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010222 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010223 xmlChar *tmp;
10224
Owen Taylor3473f882001-02-23 17:55:21 +000010225 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010226 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10227 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010228 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010229 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010230 return(NULL);
10231 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010232 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010233 }
10234 buf[len++] = cur;
10235 NEXT;
10236 cur=CUR;
10237 }
10238 buf[len] = 0;
10239 return(buf);
10240}
10241
10242/**
10243 * xmlParseVersionInfo:
10244 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010245 *
Owen Taylor3473f882001-02-23 17:55:21 +000010246 * parse the XML version.
10247 *
10248 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010249 *
Owen Taylor3473f882001-02-23 17:55:21 +000010250 * [25] Eq ::= S? '=' S?
10251 *
10252 * Returns the version string, e.g. "1.0"
10253 */
10254
10255xmlChar *
10256xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10257 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010258
Daniel Veillarda07050d2003-10-19 14:46:32 +000010259 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010260 SKIP(7);
10261 SKIP_BLANKS;
10262 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010263 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010264 return(NULL);
10265 }
10266 NEXT;
10267 SKIP_BLANKS;
10268 if (RAW == '"') {
10269 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010270 version = xmlParseVersionNum(ctxt);
10271 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010273 } else
10274 NEXT;
10275 } else if (RAW == '\''){
10276 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010277 version = xmlParseVersionNum(ctxt);
10278 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010279 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010280 } else
10281 NEXT;
10282 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010283 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010284 }
10285 }
10286 return(version);
10287}
10288
10289/**
10290 * xmlParseEncName:
10291 * @ctxt: an XML parser context
10292 *
10293 * parse the XML encoding name
10294 *
10295 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10296 *
10297 * Returns the encoding name value or NULL
10298 */
10299xmlChar *
10300xmlParseEncName(xmlParserCtxtPtr ctxt) {
10301 xmlChar *buf = NULL;
10302 int len = 0;
10303 int size = 10;
10304 xmlChar cur;
10305
10306 cur = CUR;
10307 if (((cur >= 'a') && (cur <= 'z')) ||
10308 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010309 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010310 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010311 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010312 return(NULL);
10313 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010314
Owen Taylor3473f882001-02-23 17:55:21 +000010315 buf[len++] = cur;
10316 NEXT;
10317 cur = CUR;
10318 while (((cur >= 'a') && (cur <= 'z')) ||
10319 ((cur >= 'A') && (cur <= 'Z')) ||
10320 ((cur >= '0') && (cur <= '9')) ||
10321 (cur == '.') || (cur == '_') ||
10322 (cur == '-')) {
10323 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010324 xmlChar *tmp;
10325
Owen Taylor3473f882001-02-23 17:55:21 +000010326 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010327 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10328 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010329 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010330 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010331 return(NULL);
10332 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010333 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010334 }
10335 buf[len++] = cur;
10336 NEXT;
10337 cur = CUR;
10338 if (cur == 0) {
10339 SHRINK;
10340 GROW;
10341 cur = CUR;
10342 }
10343 }
10344 buf[len] = 0;
10345 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010346 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010347 }
10348 return(buf);
10349}
10350
10351/**
10352 * xmlParseEncodingDecl:
10353 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010354 *
Owen Taylor3473f882001-02-23 17:55:21 +000010355 * parse the XML encoding declaration
10356 *
10357 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10358 *
10359 * this setups the conversion filters.
10360 *
10361 * Returns the encoding value or NULL
10362 */
10363
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010364const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010365xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10366 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010367
10368 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010369 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010370 SKIP(8);
10371 SKIP_BLANKS;
10372 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010373 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010374 return(NULL);
10375 }
10376 NEXT;
10377 SKIP_BLANKS;
10378 if (RAW == '"') {
10379 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010380 encoding = xmlParseEncName(ctxt);
10381 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010382 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010383 xmlFree((xmlChar *) encoding);
10384 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010385 } else
10386 NEXT;
10387 } else if (RAW == '\''){
10388 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010389 encoding = xmlParseEncName(ctxt);
10390 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010391 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010392 xmlFree((xmlChar *) encoding);
10393 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010394 } else
10395 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010396 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010397 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010398 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010399
10400 /*
10401 * Non standard parsing, allowing the user to ignore encoding
10402 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010403 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10404 xmlFree((xmlChar *) encoding);
10405 return(NULL);
10406 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010407
Daniel Veillard6b621b82003-08-11 15:03:34 +000010408 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010409 * UTF-16 encoding switch has already taken place at this stage,
Daniel Veillard6b621b82003-08-11 15:03:34 +000010410 * more over the little-endian/big-endian selection is already done
10411 */
10412 if ((encoding != NULL) &&
10413 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10414 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010415 /*
10416 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010417 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010418 * document is apparently UTF-8 compatible, then raise an
10419 * encoding mismatch fatal error
10420 */
10421 if ((ctxt->encoding == NULL) &&
10422 (ctxt->input->buf != NULL) &&
10423 (ctxt->input->buf->encoder == NULL)) {
10424 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10425 "Document labelled UTF-16 but has UTF-8 content\n");
10426 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010427 if (ctxt->encoding != NULL)
10428 xmlFree((xmlChar *) ctxt->encoding);
10429 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010430 }
10431 /*
10432 * UTF-8 encoding is handled natively
10433 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010434 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010435 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10436 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010437 if (ctxt->encoding != NULL)
10438 xmlFree((xmlChar *) ctxt->encoding);
10439 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010440 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010441 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010442 xmlCharEncodingHandlerPtr handler;
10443
10444 if (ctxt->input->encoding != NULL)
10445 xmlFree((xmlChar *) ctxt->input->encoding);
10446 ctxt->input->encoding = encoding;
10447
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010448 handler = xmlFindCharEncodingHandler((const char *) encoding);
10449 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010450 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10451 /* failed to convert */
10452 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10453 return(NULL);
10454 }
Owen Taylor3473f882001-02-23 17:55:21 +000010455 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010456 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010457 "Unsupported encoding %s\n", encoding);
10458 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010459 }
10460 }
10461 }
10462 return(encoding);
10463}
10464
10465/**
10466 * xmlParseSDDecl:
10467 * @ctxt: an XML parser context
10468 *
10469 * parse the XML standalone declaration
10470 *
10471 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010472 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010473 *
10474 * [ VC: Standalone Document Declaration ]
10475 * TODO The standalone document declaration must have the value "no"
10476 * if any external markup declarations contain declarations of:
10477 * - attributes with default values, if elements to which these
10478 * attributes apply appear in the document without specifications
10479 * of values for these attributes, or
10480 * - entities (other than amp, lt, gt, apos, quot), if references
10481 * to those entities appear in the document, or
10482 * - attributes with values subject to normalization, where the
10483 * attribute appears in the document with a value which will change
10484 * as a result of normalization, or
10485 * - element types with element content, if white space occurs directly
10486 * within any instance of those types.
10487 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010488 * Returns:
10489 * 1 if standalone="yes"
10490 * 0 if standalone="no"
10491 * -2 if standalone attribute is missing or invalid
10492 * (A standalone value of -2 means that the XML declaration was found,
10493 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010494 */
10495
10496int
10497xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010498 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010499
10500 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010501 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010502 SKIP(10);
10503 SKIP_BLANKS;
10504 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010505 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010506 return(standalone);
10507 }
10508 NEXT;
10509 SKIP_BLANKS;
10510 if (RAW == '\''){
10511 NEXT;
10512 if ((RAW == 'n') && (NXT(1) == 'o')) {
10513 standalone = 0;
10514 SKIP(2);
10515 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10516 (NXT(2) == 's')) {
10517 standalone = 1;
10518 SKIP(3);
10519 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010520 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010521 }
10522 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010524 } else
10525 NEXT;
10526 } else if (RAW == '"'){
10527 NEXT;
10528 if ((RAW == 'n') && (NXT(1) == 'o')) {
10529 standalone = 0;
10530 SKIP(2);
10531 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10532 (NXT(2) == 's')) {
10533 standalone = 1;
10534 SKIP(3);
10535 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010536 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010537 }
10538 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010539 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010540 } else
10541 NEXT;
10542 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010543 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010544 }
10545 }
10546 return(standalone);
10547}
10548
10549/**
10550 * xmlParseXMLDecl:
10551 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010552 *
Owen Taylor3473f882001-02-23 17:55:21 +000010553 * parse an XML declaration header
10554 *
10555 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10556 */
10557
10558void
10559xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10560 xmlChar *version;
10561
10562 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010563 * This value for standalone indicates that the document has an
10564 * XML declaration but it does not have a standalone attribute.
10565 * It will be overwritten later if a standalone attribute is found.
10566 */
10567 ctxt->input->standalone = -2;
10568
10569 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010570 * We know that '<?xml' is here.
10571 */
10572 SKIP(5);
10573
William M. Brack76e95df2003-10-18 16:20:14 +000010574 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010575 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10576 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010577 }
10578 SKIP_BLANKS;
10579
10580 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010581 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010582 */
10583 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010584 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010585 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010586 } else {
10587 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10588 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010589 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010590 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010591 if (ctxt->options & XML_PARSE_OLD10) {
10592 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10593 "Unsupported version '%s'\n",
10594 version);
10595 } else {
10596 if ((version[0] == '1') && ((version[1] == '.'))) {
10597 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10598 "Unsupported version '%s'\n",
10599 version, NULL);
10600 } else {
10601 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10602 "Unsupported version '%s'\n",
10603 version);
10604 }
10605 }
Daniel Veillard19840942001-11-29 16:11:38 +000010606 }
10607 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010608 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010609 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010610 }
Owen Taylor3473f882001-02-23 17:55:21 +000010611
10612 /*
10613 * We may have the encoding declaration
10614 */
William M. Brack76e95df2003-10-18 16:20:14 +000010615 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010616 if ((RAW == '?') && (NXT(1) == '>')) {
10617 SKIP(2);
10618 return;
10619 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010621 }
10622 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010623 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10624 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010625 /*
10626 * The XML REC instructs us to stop parsing right here
10627 */
10628 return;
10629 }
10630
10631 /*
10632 * We may have the standalone status.
10633 */
William M. Brack76e95df2003-10-18 16:20:14 +000010634 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010635 if ((RAW == '?') && (NXT(1) == '>')) {
10636 SKIP(2);
10637 return;
10638 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010639 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010640 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010641
10642 /*
10643 * We can grow the input buffer freely at that point
10644 */
10645 GROW;
10646
Owen Taylor3473f882001-02-23 17:55:21 +000010647 SKIP_BLANKS;
10648 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10649
10650 SKIP_BLANKS;
10651 if ((RAW == '?') && (NXT(1) == '>')) {
10652 SKIP(2);
10653 } else if (RAW == '>') {
10654 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010655 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010656 NEXT;
10657 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010658 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010659 MOVETO_ENDTAG(CUR_PTR);
10660 NEXT;
10661 }
10662}
10663
10664/**
10665 * xmlParseMisc:
10666 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010667 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010668 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010669 *
10670 * [27] Misc ::= Comment | PI | S
10671 */
10672
10673void
10674xmlParseMisc(xmlParserCtxtPtr ctxt) {
Elliott Hughesecdab2a2022-02-23 14:33:50 -080010675 while (ctxt->instate != XML_PARSER_EOF) {
10676 SKIP_BLANKS;
10677 GROW;
Daniel Veillard561b7f82002-03-20 21:55:57 +000010678 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010679 xmlParsePI(ctxt);
Elliott Hughesecdab2a2022-02-23 14:33:50 -080010680 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010681 xmlParseComment(ctxt);
Elliott Hughesecdab2a2022-02-23 14:33:50 -080010682 } else {
10683 break;
10684 }
Owen Taylor3473f882001-02-23 17:55:21 +000010685 }
10686}
10687
10688/**
10689 * xmlParseDocument:
10690 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010691 *
Owen Taylor3473f882001-02-23 17:55:21 +000010692 * parse an XML document (and build a tree if using the standard SAX
10693 * interface).
10694 *
10695 * [1] document ::= prolog element Misc*
10696 *
10697 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10698 *
10699 * Returns 0, -1 in case of error. the parser context is augmented
10700 * as a result of the parsing.
10701 */
10702
10703int
10704xmlParseDocument(xmlParserCtxtPtr ctxt) {
10705 xmlChar start[4];
10706 xmlCharEncoding enc;
10707
10708 xmlInitParser();
10709
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010710 if ((ctxt == NULL) || (ctxt->input == NULL))
10711 return(-1);
10712
Owen Taylor3473f882001-02-23 17:55:21 +000010713 GROW;
10714
10715 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010716 * SAX: detecting the level.
10717 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010718 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010719
10720 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010721 * SAX: beginning of the document processing.
10722 */
10723 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10724 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010725 if (ctxt->instate == XML_PARSER_EOF)
10726 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010727
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010728 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010729 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010730 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010731 * Get the 4 first bytes and decode the charset
10732 * if enc != XML_CHAR_ENCODING_NONE
10733 * plug some encoding conversion routines.
10734 */
10735 start[0] = RAW;
10736 start[1] = NXT(1);
10737 start[2] = NXT(2);
10738 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010739 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010740 if (enc != XML_CHAR_ENCODING_NONE) {
10741 xmlSwitchEncoding(ctxt, enc);
10742 }
Owen Taylor3473f882001-02-23 17:55:21 +000010743 }
10744
10745
10746 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010747 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010748 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010749 }
10750
10751 /*
10752 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010753 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010754 * than just the first line, unless the amount of data is really
10755 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010756 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010757 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10758 GROW;
10759 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010760 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010761
10762 /*
10763 * Note that we will switch encoding on the fly.
10764 */
10765 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010766 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10767 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010768 /*
10769 * The XML REC instructs us to stop parsing right here
10770 */
10771 return(-1);
10772 }
10773 ctxt->standalone = ctxt->input->standalone;
10774 SKIP_BLANKS;
10775 } else {
10776 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10777 }
10778 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10779 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010780 if (ctxt->instate == XML_PARSER_EOF)
10781 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010782 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10783 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10784 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10785 }
Owen Taylor3473f882001-02-23 17:55:21 +000010786
10787 /*
10788 * The Misc part of the Prolog
10789 */
Owen Taylor3473f882001-02-23 17:55:21 +000010790 xmlParseMisc(ctxt);
10791
10792 /*
10793 * Then possibly doc type declaration(s) and more Misc
10794 * (doctypedecl Misc*)?
10795 */
10796 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010797 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010798
10799 ctxt->inSubset = 1;
10800 xmlParseDocTypeDecl(ctxt);
10801 if (RAW == '[') {
10802 ctxt->instate = XML_PARSER_DTD;
10803 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010804 if (ctxt->instate == XML_PARSER_EOF)
10805 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010806 }
10807
10808 /*
10809 * Create and update the external subset.
10810 */
10811 ctxt->inSubset = 2;
10812 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10813 (!ctxt->disableSAX))
10814 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10815 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010816 if (ctxt->instate == XML_PARSER_EOF)
10817 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010818 ctxt->inSubset = 0;
10819
Daniel Veillardac4118d2008-01-11 05:27:32 +000010820 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010821
10822 ctxt->instate = XML_PARSER_PROLOG;
10823 xmlParseMisc(ctxt);
10824 }
10825
10826 /*
10827 * Time to start parsing the tree itself
10828 */
10829 GROW;
10830 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010831 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10832 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010833 } else {
10834 ctxt->instate = XML_PARSER_CONTENT;
10835 xmlParseElement(ctxt);
10836 ctxt->instate = XML_PARSER_EPILOG;
10837
10838
10839 /*
10840 * The Misc part at the end
10841 */
10842 xmlParseMisc(ctxt);
10843
Daniel Veillard561b7f82002-03-20 21:55:57 +000010844 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010845 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010846 }
10847 ctxt->instate = XML_PARSER_EOF;
10848 }
10849
10850 /*
10851 * SAX: end of the document processing.
10852 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010853 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010854 ctxt->sax->endDocument(ctxt->userData);
10855
Daniel Veillard5997aca2002-03-18 18:36:20 +000010856 /*
10857 * Remove locally kept entity definitions if the tree was not built
10858 */
10859 if ((ctxt->myDoc != NULL) &&
10860 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10861 xmlFreeDoc(ctxt->myDoc);
10862 ctxt->myDoc = NULL;
10863 }
10864
Daniel Veillardae0765b2008-07-31 19:54:59 +000010865 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10866 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10867 if (ctxt->valid)
10868 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10869 if (ctxt->nsWellFormed)
10870 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10871 if (ctxt->options & XML_PARSE_OLD10)
10872 ctxt->myDoc->properties |= XML_DOC_OLD10;
10873 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010874 if (! ctxt->wellFormed) {
10875 ctxt->valid = 0;
10876 return(-1);
10877 }
Owen Taylor3473f882001-02-23 17:55:21 +000010878 return(0);
10879}
10880
10881/**
10882 * xmlParseExtParsedEnt:
10883 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010884 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010885 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010886 * An external general parsed entity is well-formed if it matches the
10887 * production labeled extParsedEnt.
10888 *
10889 * [78] extParsedEnt ::= TextDecl? content
10890 *
10891 * Returns 0, -1 in case of error. the parser context is augmented
10892 * as a result of the parsing.
10893 */
10894
10895int
10896xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10897 xmlChar start[4];
10898 xmlCharEncoding enc;
10899
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010900 if ((ctxt == NULL) || (ctxt->input == NULL))
10901 return(-1);
10902
Owen Taylor3473f882001-02-23 17:55:21 +000010903 xmlDefaultSAXHandlerInit();
10904
Daniel Veillard309f81d2003-09-23 09:02:53 +000010905 xmlDetectSAX2(ctxt);
10906
Owen Taylor3473f882001-02-23 17:55:21 +000010907 GROW;
10908
10909 /*
10910 * SAX: beginning of the document processing.
10911 */
10912 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10913 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10914
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010915 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010916 * Get the 4 first bytes and decode the charset
10917 * if enc != XML_CHAR_ENCODING_NONE
10918 * plug some encoding conversion routines.
10919 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010920 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10921 start[0] = RAW;
10922 start[1] = NXT(1);
10923 start[2] = NXT(2);
10924 start[3] = NXT(3);
10925 enc = xmlDetectCharEncoding(start, 4);
10926 if (enc != XML_CHAR_ENCODING_NONE) {
10927 xmlSwitchEncoding(ctxt, enc);
10928 }
Owen Taylor3473f882001-02-23 17:55:21 +000010929 }
10930
10931
10932 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010933 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010934 }
10935
10936 /*
10937 * Check for the XMLDecl in the Prolog.
10938 */
10939 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010940 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010941
10942 /*
10943 * Note that we will switch encoding on the fly.
10944 */
10945 xmlParseXMLDecl(ctxt);
10946 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10947 /*
10948 * The XML REC instructs us to stop parsing right here
10949 */
10950 return(-1);
10951 }
10952 SKIP_BLANKS;
10953 } else {
10954 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10955 }
10956 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10957 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010958 if (ctxt->instate == XML_PARSER_EOF)
10959 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010960
10961 /*
10962 * Doing validity checking on chunk doesn't make sense
10963 */
10964 ctxt->instate = XML_PARSER_CONTENT;
10965 ctxt->validate = 0;
10966 ctxt->loadsubset = 0;
10967 ctxt->depth = 0;
10968
10969 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010970 if (ctxt->instate == XML_PARSER_EOF)
10971 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010972
Owen Taylor3473f882001-02-23 17:55:21 +000010973 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010974 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010975 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010976 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010977 }
10978
10979 /*
10980 * SAX: end of the document processing.
10981 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010982 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010983 ctxt->sax->endDocument(ctxt->userData);
10984
10985 if (! ctxt->wellFormed) return(-1);
10986 return(0);
10987}
10988
Daniel Veillard73b013f2003-09-30 12:36:01 +000010989#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010990/************************************************************************
10991 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010992 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010993 * *
10994 ************************************************************************/
10995
10996/**
10997 * xmlParseLookupSequence:
10998 * @ctxt: an XML parser context
10999 * @first: the first char to lookup
11000 * @next: the next char to lookup or zero
11001 * @third: the next char to lookup or zero
11002 *
11003 * Try to find if a sequence (first, next, third) or just (first next) or
11004 * (first) is available in the input stream.
11005 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11006 * to avoid rescanning sequences of bytes, it DOES change the state of the
11007 * parser, do not use liberally.
11008 *
11009 * Returns the index to the current parsing point if the full sequence
11010 * is available, -1 otherwise.
11011 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011012static int
Owen Taylor3473f882001-02-23 17:55:21 +000011013xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11014 xmlChar next, xmlChar third) {
11015 int base, len;
11016 xmlParserInputPtr in;
11017 const xmlChar *buf;
11018
11019 in = ctxt->input;
11020 if (in == NULL) return(-1);
11021 base = in->cur - in->base;
11022 if (base < 0) return(-1);
11023 if (ctxt->checkIndex > base)
11024 base = ctxt->checkIndex;
11025 if (in->buf == NULL) {
11026 buf = in->base;
11027 len = in->length;
11028 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011029 buf = xmlBufContent(in->buf->buffer);
11030 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011031 }
11032 /* take into account the sequence length */
11033 if (third) len -= 2;
11034 else if (next) len --;
11035 for (;base < len;base++) {
11036 if (buf[base] == first) {
11037 if (third != 0) {
11038 if ((buf[base + 1] != next) ||
11039 (buf[base + 2] != third)) continue;
11040 } else if (next != 0) {
11041 if (buf[base + 1] != next) continue;
11042 }
11043 ctxt->checkIndex = 0;
11044#ifdef DEBUG_PUSH
11045 if (next == 0)
11046 xmlGenericError(xmlGenericErrorContext,
11047 "PP: lookup '%c' found at %d\n",
11048 first, base);
11049 else if (third == 0)
11050 xmlGenericError(xmlGenericErrorContext,
11051 "PP: lookup '%c%c' found at %d\n",
11052 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011053 else
Owen Taylor3473f882001-02-23 17:55:21 +000011054 xmlGenericError(xmlGenericErrorContext,
11055 "PP: lookup '%c%c%c' found at %d\n",
11056 first, next, third, base);
11057#endif
11058 return(base - (in->cur - in->base));
11059 }
11060 }
11061 ctxt->checkIndex = base;
11062#ifdef DEBUG_PUSH
11063 if (next == 0)
11064 xmlGenericError(xmlGenericErrorContext,
11065 "PP: lookup '%c' failed\n", first);
11066 else if (third == 0)
11067 xmlGenericError(xmlGenericErrorContext,
11068 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011069 else
Owen Taylor3473f882001-02-23 17:55:21 +000011070 xmlGenericError(xmlGenericErrorContext,
11071 "PP: lookup '%c%c%c' failed\n", first, next, third);
11072#endif
11073 return(-1);
11074}
11075
11076/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011077 * xmlParseGetLasts:
11078 * @ctxt: an XML parser context
11079 * @lastlt: pointer to store the last '<' from the input
11080 * @lastgt: pointer to store the last '>' from the input
11081 *
11082 * Lookup the last < and > in the current chunk
11083 */
11084static void
11085xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11086 const xmlChar **lastgt) {
11087 const xmlChar *tmp;
11088
11089 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11090 xmlGenericError(xmlGenericErrorContext,
11091 "Internal error: xmlParseGetLasts\n");
11092 return;
11093 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011094 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011095 tmp = ctxt->input->end;
11096 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011097 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011098 if (tmp < ctxt->input->base) {
11099 *lastlt = NULL;
11100 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011101 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011102 *lastlt = tmp;
11103 tmp++;
11104 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11105 if (*tmp == '\'') {
11106 tmp++;
11107 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11108 if (tmp < ctxt->input->end) tmp++;
11109 } else if (*tmp == '"') {
11110 tmp++;
11111 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11112 if (tmp < ctxt->input->end) tmp++;
11113 } else
11114 tmp++;
11115 }
11116 if (tmp < ctxt->input->end)
11117 *lastgt = tmp;
11118 else {
11119 tmp = *lastlt;
11120 tmp--;
11121 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11122 if (tmp >= ctxt->input->base)
11123 *lastgt = tmp;
11124 else
11125 *lastgt = NULL;
11126 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011127 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011128 } else {
11129 *lastlt = NULL;
11130 *lastgt = NULL;
11131 }
11132}
11133/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011134 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011135 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011136 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011137 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011138 *
11139 * Check that the block of characters is okay as SCdata content [20]
11140 *
11141 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011142 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011143 */
11144static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011145xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011146 int ix;
11147 unsigned char c;
11148 int codepoint;
11149
11150 if ((utf == NULL) || (len <= 0))
11151 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011152
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011153 for (ix = 0; ix < len;) { /* string is 0-terminated */
11154 c = utf[ix];
11155 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11156 if (c >= 0x20)
11157 ix++;
11158 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11159 ix++;
11160 else
11161 return(-ix);
11162 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011163 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011164 if ((utf[ix+1] & 0xc0 ) != 0x80)
11165 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011166 codepoint = (utf[ix] & 0x1f) << 6;
11167 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011168 if (!xmlIsCharQ(codepoint))
11169 return(-ix);
11170 ix += 2;
11171 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011172 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011173 if (((utf[ix+1] & 0xc0) != 0x80) ||
11174 ((utf[ix+2] & 0xc0) != 0x80))
11175 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011176 codepoint = (utf[ix] & 0xf) << 12;
11177 codepoint |= (utf[ix+1] & 0x3f) << 6;
11178 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011179 if (!xmlIsCharQ(codepoint))
11180 return(-ix);
11181 ix += 3;
11182 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011183 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011184 if (((utf[ix+1] & 0xc0) != 0x80) ||
11185 ((utf[ix+2] & 0xc0) != 0x80) ||
11186 ((utf[ix+3] & 0xc0) != 0x80))
11187 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011188 codepoint = (utf[ix] & 0x7) << 18;
11189 codepoint |= (utf[ix+1] & 0x3f) << 12;
11190 codepoint |= (utf[ix+2] & 0x3f) << 6;
11191 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011192 if (!xmlIsCharQ(codepoint))
11193 return(-ix);
11194 ix += 4;
11195 } else /* unknown encoding */
11196 return(-ix);
11197 }
11198 return(ix);
11199}
11200
11201/**
Owen Taylor3473f882001-02-23 17:55:21 +000011202 * xmlParseTryOrFinish:
11203 * @ctxt: an XML parser context
11204 * @terminate: last chunk indicator
11205 *
11206 * Try to progress on parsing
11207 *
11208 * Returns zero if no parsing was possible
11209 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011210static int
Owen Taylor3473f882001-02-23 17:55:21 +000011211xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11212 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011213 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011214 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011215 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011216
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011217 if (ctxt->input == NULL)
11218 return(0);
11219
Owen Taylor3473f882001-02-23 17:55:21 +000011220#ifdef DEBUG_PUSH
11221 switch (ctxt->instate) {
11222 case XML_PARSER_EOF:
11223 xmlGenericError(xmlGenericErrorContext,
11224 "PP: try EOF\n"); break;
11225 case XML_PARSER_START:
11226 xmlGenericError(xmlGenericErrorContext,
11227 "PP: try START\n"); break;
11228 case XML_PARSER_MISC:
11229 xmlGenericError(xmlGenericErrorContext,
11230 "PP: try MISC\n");break;
11231 case XML_PARSER_COMMENT:
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: try COMMENT\n");break;
11234 case XML_PARSER_PROLOG:
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: try PROLOG\n");break;
11237 case XML_PARSER_START_TAG:
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: try START_TAG\n");break;
11240 case XML_PARSER_CONTENT:
11241 xmlGenericError(xmlGenericErrorContext,
11242 "PP: try CONTENT\n");break;
11243 case XML_PARSER_CDATA_SECTION:
11244 xmlGenericError(xmlGenericErrorContext,
11245 "PP: try CDATA_SECTION\n");break;
11246 case XML_PARSER_END_TAG:
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: try END_TAG\n");break;
11249 case XML_PARSER_ENTITY_DECL:
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: try ENTITY_DECL\n");break;
11252 case XML_PARSER_ENTITY_VALUE:
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: try ENTITY_VALUE\n");break;
11255 case XML_PARSER_ATTRIBUTE_VALUE:
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: try ATTRIBUTE_VALUE\n");break;
11258 case XML_PARSER_DTD:
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: try DTD\n");break;
11261 case XML_PARSER_EPILOG:
11262 xmlGenericError(xmlGenericErrorContext,
11263 "PP: try EPILOG\n");break;
11264 case XML_PARSER_PI:
11265 xmlGenericError(xmlGenericErrorContext,
11266 "PP: try PI\n");break;
11267 case XML_PARSER_IGNORE:
11268 xmlGenericError(xmlGenericErrorContext,
11269 "PP: try IGNORE\n");break;
11270 }
11271#endif
11272
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011273 if ((ctxt->input != NULL) &&
11274 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011275 xmlSHRINK(ctxt);
11276 ctxt->checkIndex = 0;
11277 }
11278 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011279
Daniel Veillarde50ba812013-04-11 15:54:51 +080011280 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011281 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011282 return(0);
11283
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011284 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011285 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011286 avail = ctxt->input->length -
11287 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011288 else {
11289 /*
11290 * If we are operating on converted input, try to flush
Haibo Huangcfd91dc2020-07-30 23:01:33 -070011291 * remaining chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011292 * buffer. But do not do this in document start where
11293 * encoding="..." may not have been read and we work on a
11294 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011295 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011296 if ((ctxt->instate != XML_PARSER_START) &&
11297 (ctxt->input->buf->raw != NULL) &&
11298 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011299 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11300 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011301 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011302
11303 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011304 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11305 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011306 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011307 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011308 (ctxt->input->cur - ctxt->input->base);
11309 }
Owen Taylor3473f882001-02-23 17:55:21 +000011310 if (avail < 1)
11311 goto done;
11312 switch (ctxt->instate) {
11313 case XML_PARSER_EOF:
11314 /*
11315 * Document parsing is done !
11316 */
11317 goto done;
11318 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011319 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11320 xmlChar start[4];
11321 xmlCharEncoding enc;
11322
11323 /*
11324 * Very first chars read from the document flow.
11325 */
11326 if (avail < 4)
11327 goto done;
11328
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011329 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011330 * Get the 4 first bytes and decode the charset
11331 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011332 * plug some encoding conversion routines,
11333 * else xmlSwitchEncoding will set to (default)
11334 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011335 */
11336 start[0] = RAW;
11337 start[1] = NXT(1);
11338 start[2] = NXT(2);
11339 start[3] = NXT(3);
11340 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011341 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011342 break;
11343 }
Owen Taylor3473f882001-02-23 17:55:21 +000011344
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011345 if (avail < 2)
11346 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011347 cur = ctxt->input->cur[0];
11348 next = ctxt->input->cur[1];
11349 if (cur == 0) {
11350 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11351 ctxt->sax->setDocumentLocator(ctxt->userData,
11352 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011353 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011354 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011355#ifdef DEBUG_PUSH
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: entering EOF\n");
11358#endif
11359 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11360 ctxt->sax->endDocument(ctxt->userData);
11361 goto done;
11362 }
11363 if ((cur == '<') && (next == '?')) {
11364 /* PI or XML decl */
11365 if (avail < 5) return(ret);
11366 if ((!terminate) &&
11367 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11368 return(ret);
11369 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11370 ctxt->sax->setDocumentLocator(ctxt->userData,
11371 &xmlDefaultSAXLocator);
11372 if ((ctxt->input->cur[2] == 'x') &&
11373 (ctxt->input->cur[3] == 'm') &&
11374 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011375 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011376 ret += 5;
11377#ifdef DEBUG_PUSH
11378 xmlGenericError(xmlGenericErrorContext,
11379 "PP: Parsing XML Decl\n");
11380#endif
11381 xmlParseXMLDecl(ctxt);
11382 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11383 /*
11384 * The XML REC instructs us to stop parsing right
11385 * here
11386 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011387 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011388 return(0);
11389 }
11390 ctxt->standalone = ctxt->input->standalone;
11391 if ((ctxt->encoding == NULL) &&
11392 (ctxt->input->encoding != NULL))
11393 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11394 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11395 (!ctxt->disableSAX))
11396 ctxt->sax->startDocument(ctxt->userData);
11397 ctxt->instate = XML_PARSER_MISC;
11398#ifdef DEBUG_PUSH
11399 xmlGenericError(xmlGenericErrorContext,
11400 "PP: entering MISC\n");
11401#endif
11402 } else {
11403 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11404 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11405 (!ctxt->disableSAX))
11406 ctxt->sax->startDocument(ctxt->userData);
11407 ctxt->instate = XML_PARSER_MISC;
11408#ifdef DEBUG_PUSH
11409 xmlGenericError(xmlGenericErrorContext,
11410 "PP: entering MISC\n");
11411#endif
11412 }
11413 } else {
11414 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11415 ctxt->sax->setDocumentLocator(ctxt->userData,
11416 &xmlDefaultSAXLocator);
11417 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011418 if (ctxt->version == NULL) {
11419 xmlErrMemory(ctxt, NULL);
11420 break;
11421 }
Owen Taylor3473f882001-02-23 17:55:21 +000011422 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11423 (!ctxt->disableSAX))
11424 ctxt->sax->startDocument(ctxt->userData);
11425 ctxt->instate = XML_PARSER_MISC;
11426#ifdef DEBUG_PUSH
11427 xmlGenericError(xmlGenericErrorContext,
11428 "PP: entering MISC\n");
11429#endif
11430 }
11431 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011432 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011433 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011434 const xmlChar *prefix = NULL;
11435 const xmlChar *URI = NULL;
Elliott Hughese54f00d2021-05-13 08:13:46 -070011436 int line = ctxt->input->line;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011437 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011438
11439 if ((avail < 2) && (ctxt->inputNr == 1))
11440 goto done;
11441 cur = ctxt->input->cur[0];
11442 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011443 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011444 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011445 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11446 ctxt->sax->endDocument(ctxt->userData);
11447 goto done;
11448 }
11449 if (!terminate) {
11450 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011451 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011452 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011453 goto done;
11454 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11455 goto done;
11456 }
11457 }
11458 if (ctxt->spaceNr == 0)
11459 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011460 else if (*ctxt->space == -2)
11461 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011462 else
11463 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011464#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011465 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011466#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011467 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011468#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011469 else
11470 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011471#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011472 if (ctxt->instate == XML_PARSER_EOF)
11473 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011474 if (name == NULL) {
11475 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011476 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011477 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11478 ctxt->sax->endDocument(ctxt->userData);
11479 goto done;
11480 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011481#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011482 /*
11483 * [ VC: Root Element Type ]
11484 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011485 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011486 */
11487 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11488 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11489 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011490#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011491
11492 /*
11493 * Check for an Empty Element.
11494 */
11495 if ((RAW == '/') && (NXT(1) == '>')) {
11496 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011497
11498 if (ctxt->sax2) {
11499 if ((ctxt->sax != NULL) &&
11500 (ctxt->sax->endElementNs != NULL) &&
11501 (!ctxt->disableSAX))
11502 ctxt->sax->endElementNs(ctxt->userData, name,
11503 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011504 if (ctxt->nsNr - nsNr > 0)
11505 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011506#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011507 } else {
11508 if ((ctxt->sax != NULL) &&
11509 (ctxt->sax->endElement != NULL) &&
11510 (!ctxt->disableSAX))
11511 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011512#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011513 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011514 if (ctxt->instate == XML_PARSER_EOF)
11515 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011516 spacePop(ctxt);
11517 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011518 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011519 } else {
11520 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011521 }
Daniel Veillard65686452012-07-19 18:25:01 +080011522 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011523 break;
11524 }
11525 if (RAW == '>') {
11526 NEXT;
11527 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011528 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011529 "Couldn't find end of Start Tag %s\n",
11530 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011531 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011532 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011533 }
Elliott Hughese54f00d2021-05-13 08:13:46 -070011534 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011535
Daniel Veillarda880b122003-04-21 21:36:41 +000011536 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011537 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011538 break;
11539 }
11540 case XML_PARSER_CONTENT: {
11541 const xmlChar *test;
11542 unsigned int cons;
11543 if ((avail < 2) && (ctxt->inputNr == 1))
11544 goto done;
11545 cur = ctxt->input->cur[0];
11546 next = ctxt->input->cur[1];
11547
11548 test = CUR_PTR;
11549 cons = ctxt->input->consumed;
11550 if ((cur == '<') && (next == '/')) {
11551 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011552 break;
11553 } else if ((cur == '<') && (next == '?')) {
11554 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011555 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11556 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011557 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011558 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011559 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011560 ctxt->instate = XML_PARSER_CONTENT;
11561 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011562 } else if ((cur == '<') && (next != '!')) {
11563 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011564 break;
11565 } else if ((cur == '<') && (next == '!') &&
11566 (ctxt->input->cur[2] == '-') &&
11567 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011568 int term;
11569
11570 if (avail < 4)
11571 goto done;
11572 ctxt->input->cur += 4;
11573 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11574 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011575 if ((!terminate) && (term < 0)) {
11576 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011577 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011578 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011579 xmlParseComment(ctxt);
11580 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011581 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011582 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11583 (ctxt->input->cur[2] == '[') &&
11584 (ctxt->input->cur[3] == 'C') &&
11585 (ctxt->input->cur[4] == 'D') &&
11586 (ctxt->input->cur[5] == 'A') &&
11587 (ctxt->input->cur[6] == 'T') &&
11588 (ctxt->input->cur[7] == 'A') &&
11589 (ctxt->input->cur[8] == '[')) {
11590 SKIP(9);
11591 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011592 break;
11593 } else if ((cur == '<') && (next == '!') &&
11594 (avail < 9)) {
11595 goto done;
11596 } else if (cur == '&') {
11597 if ((!terminate) &&
11598 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11599 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011600 xmlParseReference(ctxt);
11601 } else {
11602 /* TODO Avoid the extra copy, handle directly !!! */
11603 /*
11604 * Goal of the following test is:
11605 * - minimize calls to the SAX 'character' callback
11606 * when they are mergeable
11607 * - handle an problem for isBlank when we only parse
11608 * a sequence of blank chars and the next one is
11609 * not available to check against '<' presence.
11610 * - tries to homogenize the differences in SAX
11611 * callbacks between the push and pull versions
11612 * of the parser.
11613 */
11614 if ((ctxt->inputNr == 1) &&
11615 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11616 if (!terminate) {
11617 if (ctxt->progressive) {
11618 if ((lastlt == NULL) ||
11619 (ctxt->input->cur > lastlt))
11620 goto done;
11621 } else if (xmlParseLookupSequence(ctxt,
11622 '<', 0, 0) < 0) {
11623 goto done;
11624 }
11625 }
11626 }
11627 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011628 xmlParseCharData(ctxt, 0);
11629 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011630 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011631 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11632 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011633 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011634 break;
11635 }
11636 break;
11637 }
11638 case XML_PARSER_END_TAG:
11639 if (avail < 2)
11640 goto done;
11641 if (!terminate) {
11642 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011643 /* > can be found unescaped in attribute values */
11644 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011645 goto done;
11646 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11647 goto done;
11648 }
11649 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011650 if (ctxt->sax2) {
Elliott Hughese54f00d2021-05-13 08:13:46 -070011651 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011652 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011653 }
11654#ifdef LIBXML_SAX1_ENABLED
11655 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011656 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011657#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011658 if (ctxt->instate == XML_PARSER_EOF) {
11659 /* Nothing */
11660 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011661 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011662 } else {
11663 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011664 }
11665 break;
11666 case XML_PARSER_CDATA_SECTION: {
11667 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011668 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011669 * cdataBlock merge back contiguous callbacks.
11670 */
11671 int base;
11672
11673 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11674 if (base < 0) {
11675 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011676 int tmp;
11677
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011678 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011679 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011680 if (tmp < 0) {
11681 tmp = -tmp;
11682 ctxt->input->cur += tmp;
11683 goto encoding_error;
11684 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011685 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11686 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011687 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011688 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011689 else if (ctxt->sax->characters != NULL)
11690 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011691 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011692 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011693 if (ctxt->instate == XML_PARSER_EOF)
11694 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011695 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011696 ctxt->checkIndex = 0;
11697 }
11698 goto done;
11699 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011700 int tmp;
11701
David Kilzer4f8606c2016-01-05 13:38:09 -080011702 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011703 if ((tmp < 0) || (tmp != base)) {
11704 tmp = -tmp;
11705 ctxt->input->cur += tmp;
11706 goto encoding_error;
11707 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011708 if ((ctxt->sax != NULL) && (base == 0) &&
11709 (ctxt->sax->cdataBlock != NULL) &&
11710 (!ctxt->disableSAX)) {
11711 /*
11712 * Special case to provide identical behaviour
11713 * between pull and push parsers on enpty CDATA
11714 * sections
11715 */
11716 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11717 (!strncmp((const char *)&ctxt->input->cur[-9],
11718 "<![CDATA[", 9)))
11719 ctxt->sax->cdataBlock(ctxt->userData,
11720 BAD_CAST "", 0);
11721 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011722 (!ctxt->disableSAX)) {
11723 if (ctxt->sax->cdataBlock != NULL)
11724 ctxt->sax->cdataBlock(ctxt->userData,
11725 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011726 else if (ctxt->sax->characters != NULL)
11727 ctxt->sax->characters(ctxt->userData,
11728 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011729 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011730 if (ctxt->instate == XML_PARSER_EOF)
11731 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011732 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011733 ctxt->checkIndex = 0;
11734 ctxt->instate = XML_PARSER_CONTENT;
11735#ifdef DEBUG_PUSH
11736 xmlGenericError(xmlGenericErrorContext,
11737 "PP: entering CONTENT\n");
11738#endif
11739 }
11740 break;
11741 }
Owen Taylor3473f882001-02-23 17:55:21 +000011742 case XML_PARSER_MISC:
11743 SKIP_BLANKS;
11744 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011745 avail = ctxt->input->length -
11746 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011747 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011748 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011749 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011750 if (avail < 2)
11751 goto done;
11752 cur = ctxt->input->cur[0];
11753 next = ctxt->input->cur[1];
11754 if ((cur == '<') && (next == '?')) {
11755 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011756 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11757 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011758 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011759 }
Owen Taylor3473f882001-02-23 17:55:21 +000011760#ifdef DEBUG_PUSH
11761 xmlGenericError(xmlGenericErrorContext,
11762 "PP: Parsing PI\n");
11763#endif
11764 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011765 if (ctxt->instate == XML_PARSER_EOF)
11766 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011767 ctxt->instate = XML_PARSER_MISC;
11768 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011769 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011770 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011771 (ctxt->input->cur[2] == '-') &&
11772 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011773 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011774 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11775 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011776 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011777 }
Owen Taylor3473f882001-02-23 17:55:21 +000011778#ifdef DEBUG_PUSH
11779 xmlGenericError(xmlGenericErrorContext,
11780 "PP: Parsing Comment\n");
11781#endif
11782 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011783 if (ctxt->instate == XML_PARSER_EOF)
11784 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011785 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011786 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011787 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011788 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011789 (ctxt->input->cur[2] == 'D') &&
11790 (ctxt->input->cur[3] == 'O') &&
11791 (ctxt->input->cur[4] == 'C') &&
11792 (ctxt->input->cur[5] == 'T') &&
11793 (ctxt->input->cur[6] == 'Y') &&
11794 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011795 (ctxt->input->cur[8] == 'E')) {
11796 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011797 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11798 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011799 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011800 }
Owen Taylor3473f882001-02-23 17:55:21 +000011801#ifdef DEBUG_PUSH
11802 xmlGenericError(xmlGenericErrorContext,
11803 "PP: Parsing internal subset\n");
11804#endif
11805 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011806 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011807 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011808 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011809 if (ctxt->instate == XML_PARSER_EOF)
11810 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011811 if (RAW == '[') {
11812 ctxt->instate = XML_PARSER_DTD;
11813#ifdef DEBUG_PUSH
11814 xmlGenericError(xmlGenericErrorContext,
11815 "PP: entering DTD\n");
11816#endif
11817 } else {
11818 /*
11819 * Create and update the external subset.
11820 */
11821 ctxt->inSubset = 2;
11822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11823 (ctxt->sax->externalSubset != NULL))
11824 ctxt->sax->externalSubset(ctxt->userData,
11825 ctxt->intSubName, ctxt->extSubSystem,
11826 ctxt->extSubURI);
11827 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011828 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011829 ctxt->instate = XML_PARSER_PROLOG;
11830#ifdef DEBUG_PUSH
11831 xmlGenericError(xmlGenericErrorContext,
11832 "PP: entering PROLOG\n");
11833#endif
11834 }
11835 } else if ((cur == '<') && (next == '!') &&
11836 (avail < 9)) {
11837 goto done;
11838 } else {
11839 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011840 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011841 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011842#ifdef DEBUG_PUSH
11843 xmlGenericError(xmlGenericErrorContext,
11844 "PP: entering START_TAG\n");
11845#endif
11846 }
11847 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011848 case XML_PARSER_PROLOG:
11849 SKIP_BLANKS;
11850 if (ctxt->input->buf == NULL)
11851 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11852 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011853 avail = xmlBufUse(ctxt->input->buf->buffer) -
11854 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011855 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011856 goto done;
11857 cur = ctxt->input->cur[0];
11858 next = ctxt->input->cur[1];
11859 if ((cur == '<') && (next == '?')) {
11860 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011861 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11862 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011863 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011864 }
Owen Taylor3473f882001-02-23 17:55:21 +000011865#ifdef DEBUG_PUSH
11866 xmlGenericError(xmlGenericErrorContext,
11867 "PP: Parsing PI\n");
11868#endif
11869 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011870 if (ctxt->instate == XML_PARSER_EOF)
11871 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011872 ctxt->instate = XML_PARSER_PROLOG;
11873 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011874 } else if ((cur == '<') && (next == '!') &&
11875 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11876 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011877 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11878 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011879 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011880 }
Owen Taylor3473f882001-02-23 17:55:21 +000011881#ifdef DEBUG_PUSH
11882 xmlGenericError(xmlGenericErrorContext,
11883 "PP: Parsing Comment\n");
11884#endif
11885 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011886 if (ctxt->instate == XML_PARSER_EOF)
11887 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011888 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011889 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011890 } else if ((cur == '<') && (next == '!') &&
11891 (avail < 4)) {
11892 goto done;
11893 } else {
11894 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011895 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011896 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011897 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011898#ifdef DEBUG_PUSH
11899 xmlGenericError(xmlGenericErrorContext,
11900 "PP: entering START_TAG\n");
11901#endif
11902 }
11903 break;
11904 case XML_PARSER_EPILOG:
11905 SKIP_BLANKS;
11906 if (ctxt->input->buf == NULL)
11907 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11908 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011909 avail = xmlBufUse(ctxt->input->buf->buffer) -
11910 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011911 if (avail < 2)
11912 goto done;
11913 cur = ctxt->input->cur[0];
11914 next = ctxt->input->cur[1];
11915 if ((cur == '<') && (next == '?')) {
11916 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011917 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11918 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011919 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011920 }
Owen Taylor3473f882001-02-23 17:55:21 +000011921#ifdef DEBUG_PUSH
11922 xmlGenericError(xmlGenericErrorContext,
11923 "PP: Parsing PI\n");
11924#endif
11925 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011926 if (ctxt->instate == XML_PARSER_EOF)
11927 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011928 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011929 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011930 } else if ((cur == '<') && (next == '!') &&
11931 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11932 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011933 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11934 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011935 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011936 }
Owen Taylor3473f882001-02-23 17:55:21 +000011937#ifdef DEBUG_PUSH
11938 xmlGenericError(xmlGenericErrorContext,
11939 "PP: Parsing Comment\n");
11940#endif
11941 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011942 if (ctxt->instate == XML_PARSER_EOF)
11943 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011944 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011945 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011946 } else if ((cur == '<') && (next == '!') &&
11947 (avail < 4)) {
11948 goto done;
11949 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011950 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011951 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011952#ifdef DEBUG_PUSH
11953 xmlGenericError(xmlGenericErrorContext,
11954 "PP: entering EOF\n");
11955#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011956 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011957 ctxt->sax->endDocument(ctxt->userData);
11958 goto done;
11959 }
11960 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011961 case XML_PARSER_DTD: {
11962 /*
11963 * Sorry but progressive parsing of the internal subset
11964 * is not expected to be supported. We first check that
11965 * the full content of the internal subset is available and
11966 * the parsing is launched only at that point.
11967 * Internal subset ends up with "']' S? '>'" in an unescaped
11968 * section and not in a ']]>' sequence which are conditional
11969 * sections (whoever argued to keep that crap in XML deserve
11970 * a place in hell !).
11971 */
11972 int base, i;
11973 xmlChar *buf;
11974 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011975 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011976
11977 base = ctxt->input->cur - ctxt->input->base;
11978 if (base < 0) return(0);
11979 if (ctxt->checkIndex > base)
11980 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011981 buf = xmlBufContent(ctxt->input->buf->buffer);
11982 use = xmlBufUse(ctxt->input->buf->buffer);
11983 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011984 if (quote != 0) {
11985 if (buf[base] == quote)
11986 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011987 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011988 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011989 if ((quote == 0) && (buf[base] == '<')) {
11990 int found = 0;
11991 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011992 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011993 (buf[base + 1] == '!') &&
11994 (buf[base + 2] == '-') &&
11995 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011996 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011997 if ((buf[base] == '-') &&
11998 (buf[base + 1] == '-') &&
11999 (buf[base + 2] == '>')) {
12000 found = 1;
12001 base += 2;
12002 break;
12003 }
12004 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012005 if (!found) {
12006#if 0
12007 fprintf(stderr, "unfinished comment\n");
12008#endif
12009 break; /* for */
12010 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012011 continue;
12012 }
12013 }
Owen Taylor3473f882001-02-23 17:55:21 +000012014 if (buf[base] == '"') {
12015 quote = '"';
12016 continue;
12017 }
12018 if (buf[base] == '\'') {
12019 quote = '\'';
12020 continue;
12021 }
12022 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012023#if 0
12024 fprintf(stderr, "%c%c%c%c: ", buf[base],
12025 buf[base + 1], buf[base + 2], buf[base + 3]);
12026#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012027 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012028 break;
12029 if (buf[base + 1] == ']') {
12030 /* conditional crap, skip both ']' ! */
12031 base++;
12032 continue;
12033 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012034 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012035 if (buf[base + i] == '>') {
12036#if 0
12037 fprintf(stderr, "found\n");
12038#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012039 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012040 }
12041 if (!IS_BLANK_CH(buf[base + i])) {
12042#if 0
12043 fprintf(stderr, "not found\n");
12044#endif
12045 goto not_end_of_int_subset;
12046 }
Owen Taylor3473f882001-02-23 17:55:21 +000012047 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012048#if 0
12049 fprintf(stderr, "end of stream\n");
12050#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012051 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012052
Owen Taylor3473f882001-02-23 17:55:21 +000012053 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012054not_end_of_int_subset:
12055 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012056 }
12057 /*
12058 * We didn't found the end of the Internal subset
12059 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012060 if (quote == 0)
12061 ctxt->checkIndex = base;
12062 else
12063 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012064#ifdef DEBUG_PUSH
12065 if (next == 0)
12066 xmlGenericError(xmlGenericErrorContext,
12067 "PP: lookup of int subset end filed\n");
12068#endif
12069 goto done;
12070
12071found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012072 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012073 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012074 if (ctxt->instate == XML_PARSER_EOF)
12075 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012076 ctxt->inSubset = 2;
12077 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12078 (ctxt->sax->externalSubset != NULL))
12079 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12080 ctxt->extSubSystem, ctxt->extSubURI);
12081 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012082 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012083 if (ctxt->instate == XML_PARSER_EOF)
12084 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012085 ctxt->instate = XML_PARSER_PROLOG;
12086 ctxt->checkIndex = 0;
12087#ifdef DEBUG_PUSH
12088 xmlGenericError(xmlGenericErrorContext,
12089 "PP: entering PROLOG\n");
12090#endif
12091 break;
12092 }
12093 case XML_PARSER_COMMENT:
12094 xmlGenericError(xmlGenericErrorContext,
12095 "PP: internal error, state == COMMENT\n");
12096 ctxt->instate = XML_PARSER_CONTENT;
12097#ifdef DEBUG_PUSH
12098 xmlGenericError(xmlGenericErrorContext,
12099 "PP: entering CONTENT\n");
12100#endif
12101 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012102 case XML_PARSER_IGNORE:
12103 xmlGenericError(xmlGenericErrorContext,
12104 "PP: internal error, state == IGNORE");
12105 ctxt->instate = XML_PARSER_DTD;
12106#ifdef DEBUG_PUSH
12107 xmlGenericError(xmlGenericErrorContext,
12108 "PP: entering DTD\n");
12109#endif
12110 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012111 case XML_PARSER_PI:
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: internal error, state == PI\n");
12114 ctxt->instate = XML_PARSER_CONTENT;
12115#ifdef DEBUG_PUSH
12116 xmlGenericError(xmlGenericErrorContext,
12117 "PP: entering CONTENT\n");
12118#endif
12119 break;
12120 case XML_PARSER_ENTITY_DECL:
12121 xmlGenericError(xmlGenericErrorContext,
12122 "PP: internal error, state == ENTITY_DECL\n");
12123 ctxt->instate = XML_PARSER_DTD;
12124#ifdef DEBUG_PUSH
12125 xmlGenericError(xmlGenericErrorContext,
12126 "PP: entering DTD\n");
12127#endif
12128 break;
12129 case XML_PARSER_ENTITY_VALUE:
12130 xmlGenericError(xmlGenericErrorContext,
12131 "PP: internal error, state == ENTITY_VALUE\n");
12132 ctxt->instate = XML_PARSER_CONTENT;
12133#ifdef DEBUG_PUSH
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: entering DTD\n");
12136#endif
12137 break;
12138 case XML_PARSER_ATTRIBUTE_VALUE:
12139 xmlGenericError(xmlGenericErrorContext,
12140 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12141 ctxt->instate = XML_PARSER_START_TAG;
12142#ifdef DEBUG_PUSH
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: entering START_TAG\n");
12145#endif
12146 break;
12147 case XML_PARSER_SYSTEM_LITERAL:
12148 xmlGenericError(xmlGenericErrorContext,
12149 "PP: internal error, state == SYSTEM_LITERAL\n");
12150 ctxt->instate = XML_PARSER_START_TAG;
12151#ifdef DEBUG_PUSH
12152 xmlGenericError(xmlGenericErrorContext,
12153 "PP: entering START_TAG\n");
12154#endif
12155 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012156 case XML_PARSER_PUBLIC_LITERAL:
12157 xmlGenericError(xmlGenericErrorContext,
12158 "PP: internal error, state == PUBLIC_LITERAL\n");
12159 ctxt->instate = XML_PARSER_START_TAG;
12160#ifdef DEBUG_PUSH
12161 xmlGenericError(xmlGenericErrorContext,
12162 "PP: entering START_TAG\n");
12163#endif
12164 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012165 }
12166 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012167done:
Owen Taylor3473f882001-02-23 17:55:21 +000012168#ifdef DEBUG_PUSH
12169 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12170#endif
12171 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012172encoding_error:
Nick Wellnhoferb4dc99e2023-02-18 17:29:07 +010012173 if (ctxt->input->end - ctxt->input->cur < 4) {
12174 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12175 "Input is not proper UTF-8, indicate encoding !\n",
12176 NULL, NULL);
12177 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012178 char buffer[150];
12179
12180 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12181 ctxt->input->cur[0], ctxt->input->cur[1],
12182 ctxt->input->cur[2], ctxt->input->cur[3]);
12183 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12184 "Input is not proper UTF-8, indicate encoding !\n%s",
12185 BAD_CAST buffer, NULL);
12186 }
12187 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012188}
12189
12190/**
Daniel Veillard65686452012-07-19 18:25:01 +080012191 * xmlParseCheckTransition:
12192 * @ctxt: an XML parser context
12193 * @chunk: a char array
12194 * @size: the size in byte of the chunk
12195 *
12196 * Check depending on the current parser state if the chunk given must be
12197 * processed immediately or one need more data to advance on parsing.
12198 *
12199 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12200 */
12201static int
12202xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12203 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12204 return(-1);
12205 if (ctxt->instate == XML_PARSER_START_TAG) {
12206 if (memchr(chunk, '>', size) != NULL)
12207 return(1);
12208 return(0);
12209 }
12210 if (ctxt->progressive == XML_PARSER_COMMENT) {
12211 if (memchr(chunk, '>', size) != NULL)
12212 return(1);
12213 return(0);
12214 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012215 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12216 if (memchr(chunk, '>', size) != NULL)
12217 return(1);
12218 return(0);
12219 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012220 if (ctxt->progressive == XML_PARSER_PI) {
12221 if (memchr(chunk, '>', size) != NULL)
12222 return(1);
12223 return(0);
12224 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012225 if (ctxt->instate == XML_PARSER_END_TAG) {
12226 if (memchr(chunk, '>', size) != NULL)
12227 return(1);
12228 return(0);
12229 }
12230 if ((ctxt->progressive == XML_PARSER_DTD) ||
12231 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012232 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012233 return(1);
12234 return(0);
12235 }
Daniel Veillard65686452012-07-19 18:25:01 +080012236 return(1);
12237}
12238
12239/**
Owen Taylor3473f882001-02-23 17:55:21 +000012240 * xmlParseChunk:
12241 * @ctxt: an XML parser context
12242 * @chunk: an char array
12243 * @size: the size in byte of the chunk
12244 * @terminate: last chunk indicator
12245 *
12246 * Parse a Chunk of memory
12247 *
12248 * Returns zero if no error, the xmlParserErrors otherwise.
12249 */
12250int
12251xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12252 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012253 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012254 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012255 size_t old_avail = 0;
12256 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012257
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012258 if (ctxt == NULL)
12259 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012260 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012261 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012262 if (ctxt->instate == XML_PARSER_EOF)
12263 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012264 if (ctxt->instate == XML_PARSER_START)
12265 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012266 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12267 (chunk[size - 1] == '\r')) {
12268 end_in_lf = 1;
12269 size--;
12270 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012271
12272xmldecl_done:
12273
Owen Taylor3473f882001-02-23 17:55:21 +000012274 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12275 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012276 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12277 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012278 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012279
Daniel Veillard65686452012-07-19 18:25:01 +080012280 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012281 /*
12282 * Specific handling if we autodetected an encoding, we should not
12283 * push more than the first line ... which depend on the encoding
12284 * And only push the rest once the final encoding was detected
12285 */
12286 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12287 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012288 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012289
12290 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12291 BAD_CAST "UTF-16")) ||
12292 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12293 BAD_CAST "UTF16")))
12294 len = 90;
12295 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12296 BAD_CAST "UCS-4")) ||
12297 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12298 BAD_CAST "UCS4")))
12299 len = 180;
12300
12301 if (ctxt->input->buf->rawconsumed < len)
12302 len -= ctxt->input->buf->rawconsumed;
12303
Raul Hudeaba9716a2010-03-15 10:13:29 +010012304 /*
12305 * Change size for reading the initial declaration only
12306 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12307 * will blindly copy extra bytes from memory.
12308 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012309 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012310 remain = size - len;
12311 size = len;
12312 } else {
12313 remain = 0;
12314 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012315 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012316 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012317 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
William M. Bracka3215c72004-07-31 16:24:01 +000012318 if (res < 0) {
12319 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012320 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012321 return (XML_PARSER_EOF);
12322 }
Owen Taylor3473f882001-02-23 17:55:21 +000012323#ifdef DEBUG_PUSH
12324 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12325#endif
12326
Owen Taylor3473f882001-02-23 17:55:21 +000012327 } else if (ctxt->instate != XML_PARSER_EOF) {
12328 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12329 xmlParserInputBufferPtr in = ctxt->input->buf;
12330 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12331 (in->raw != NULL)) {
12332 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012333 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12334 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012335
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012336 nbchars = xmlCharEncInput(in, terminate);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012337 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012338 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012339 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012340 xmlGenericError(xmlGenericErrorContext,
12341 "xmlParseChunk: encoder error\n");
Nick Wellnhoferab362ab2018-01-22 15:40:05 +010012342 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012343 return(XML_ERR_INVALID_ENCODING);
12344 }
12345 }
12346 }
12347 }
Daniel Veillard65686452012-07-19 18:25:01 +080012348 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012349 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012350 } else {
12351 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12352 avail = xmlBufUse(ctxt->input->buf->buffer);
12353 /*
12354 * Depending on the current state it may not be such
12355 * a good idea to try parsing if there is nothing in the chunk
12356 * which would be worth doing a parser state transition and we
12357 * need to wait for more data
12358 */
12359 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12360 (old_avail == 0) || (avail == 0) ||
12361 (xmlParseCheckTransition(ctxt,
12362 (const char *)&ctxt->input->base[old_avail],
12363 avail - old_avail)))
12364 xmlParseTryOrFinish(ctxt, terminate);
12365 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012366 if (ctxt->instate == XML_PARSER_EOF)
12367 return(ctxt->errNo);
12368
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012369 if ((ctxt->input != NULL) &&
12370 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12371 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12372 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12373 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012374 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012375 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012376 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12377 return(ctxt->errNo);
12378
12379 if (remain != 0) {
12380 chunk += size;
12381 size = remain;
12382 remain = 0;
12383 goto xmldecl_done;
12384 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012385 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12386 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012387 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12388 ctxt->input);
12389 size_t current = ctxt->input->cur - ctxt->input->base;
12390
Daniel Veillarda617e242006-01-09 14:38:44 +000012391 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012392
12393 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12394 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012395 }
Owen Taylor3473f882001-02-23 17:55:21 +000012396 if (terminate) {
12397 /*
12398 * Check for termination
12399 */
Daniel Veillard65686452012-07-19 18:25:01 +080012400 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012401
12402 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012403 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012404 cur_avail = ctxt->input->length -
12405 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012406 else
Daniel Veillard65686452012-07-19 18:25:01 +080012407 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12408 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012409 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012410
Owen Taylor3473f882001-02-23 17:55:21 +000012411 if ((ctxt->instate != XML_PARSER_EOF) &&
12412 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012413 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012414 }
Daniel Veillard65686452012-07-19 18:25:01 +080012415 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012416 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012417 }
Owen Taylor3473f882001-02-23 17:55:21 +000012418 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012419 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012420 ctxt->sax->endDocument(ctxt->userData);
12421 }
12422 ctxt->instate = XML_PARSER_EOF;
12423 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012424 if (ctxt->wellFormed == 0)
12425 return((xmlParserErrors) ctxt->errNo);
12426 else
12427 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012428}
12429
12430/************************************************************************
12431 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012432 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012433 * *
12434 ************************************************************************/
12435
12436/**
Owen Taylor3473f882001-02-23 17:55:21 +000012437 * xmlCreatePushParserCtxt:
12438 * @sax: a SAX handler
12439 * @user_data: The user data returned on SAX callbacks
12440 * @chunk: a pointer to an array of chars
12441 * @size: number of chars in the array
12442 * @filename: an optional file name or URI
12443 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012444 * Create a parser context for using the XML parser in push mode.
12445 * If @buffer and @size are non-NULL, the data is used to detect
12446 * the encoding. The remaining characters will be parsed so they
12447 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012448 * To allow content encoding detection, @size should be >= 4
12449 * The value of @filename is used for fetching external entities
12450 * and error/warning reports.
12451 *
12452 * Returns the new parser context or NULL
12453 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012454
Owen Taylor3473f882001-02-23 17:55:21 +000012455xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012456xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012457 const char *chunk, int size, const char *filename) {
12458 xmlParserCtxtPtr ctxt;
12459 xmlParserInputPtr inputStream;
12460 xmlParserInputBufferPtr buf;
12461 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12462
12463 /*
12464 * plug some encoding conversion routines
12465 */
12466 if ((chunk != NULL) && (size >= 4))
12467 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12468
12469 buf = xmlAllocParserInputBuffer(enc);
12470 if (buf == NULL) return(NULL);
12471
12472 ctxt = xmlNewParserCtxt();
12473 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012474 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012475 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012476 return(NULL);
12477 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012478 ctxt->dictNames = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012479 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012480#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012481 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012482#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012483 xmlFree(ctxt->sax);
12484 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12485 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012486 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012487 xmlFreeParserInputBuffer(buf);
12488 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012489 return(NULL);
12490 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012491 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12492 if (sax->initialized == XML_SAX2_MAGIC)
12493 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12494 else
12495 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012496 if (user_data != NULL)
12497 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012498 }
Owen Taylor3473f882001-02-23 17:55:21 +000012499 if (filename == NULL) {
12500 ctxt->directory = NULL;
12501 } else {
12502 ctxt->directory = xmlParserGetDirectory(filename);
12503 }
12504
12505 inputStream = xmlNewInputStream(ctxt);
12506 if (inputStream == NULL) {
12507 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012508 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012509 return(NULL);
12510 }
12511
12512 if (filename == NULL)
12513 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012514 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012515 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012516 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012517 if (inputStream->filename == NULL) {
12518 xmlFreeParserCtxt(ctxt);
12519 xmlFreeParserInputBuffer(buf);
12520 return(NULL);
12521 }
12522 }
Owen Taylor3473f882001-02-23 17:55:21 +000012523 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012524 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012525 inputPush(ctxt, inputStream);
12526
William M. Brack3a1cd212005-02-11 14:35:54 +000012527 /*
12528 * If the caller didn't provide an initial 'chunk' for determining
12529 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12530 * that it can be automatically determined later
12531 */
12532 if ((size == 0) || (chunk == NULL)) {
12533 ctxt->charset = XML_CHAR_ENCODING_NONE;
12534 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012535 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12536 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012537
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012538 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012539
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012540 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012541#ifdef DEBUG_PUSH
12542 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12543#endif
12544 }
12545
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012546 if (enc != XML_CHAR_ENCODING_NONE) {
12547 xmlSwitchEncoding(ctxt, enc);
12548 }
12549
Owen Taylor3473f882001-02-23 17:55:21 +000012550 return(ctxt);
12551}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012552#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012553
12554/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012555 * xmlHaltParser:
12556 * @ctxt: an XML parser context
12557 *
12558 * Blocks further parser processing don't override error
12559 * for internal use
12560 */
12561static void
12562xmlHaltParser(xmlParserCtxtPtr ctxt) {
12563 if (ctxt == NULL)
12564 return;
12565 ctxt->instate = XML_PARSER_EOF;
12566 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012567 while (ctxt->inputNr > 1)
12568 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012569 if (ctxt->input != NULL) {
12570 /*
12571 * in case there was a specific allocation deallocate before
12572 * overriding base
12573 */
12574 if (ctxt->input->free != NULL) {
12575 ctxt->input->free((xmlChar *) ctxt->input->base);
12576 ctxt->input->free = NULL;
12577 }
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012578 if (ctxt->input->buf != NULL) {
12579 xmlFreeParserInputBuffer(ctxt->input->buf);
12580 ctxt->input->buf = NULL;
12581 }
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012582 ctxt->input->cur = BAD_CAST"";
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012583 ctxt->input->length = 0;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012584 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012585 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012586 }
12587}
12588
12589/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012590 * xmlStopParser:
12591 * @ctxt: an XML parser context
12592 *
12593 * Blocks further parser processing
12594 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012595void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012596xmlStopParser(xmlParserCtxtPtr ctxt) {
12597 if (ctxt == NULL)
12598 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012599 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012600 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012601}
12602
12603/**
Owen Taylor3473f882001-02-23 17:55:21 +000012604 * xmlCreateIOParserCtxt:
12605 * @sax: a SAX handler
12606 * @user_data: The user data returned on SAX callbacks
12607 * @ioread: an I/O read function
12608 * @ioclose: an I/O close function
12609 * @ioctx: an I/O handler
12610 * @enc: the charset encoding if known
12611 *
12612 * Create a parser context for using the XML parser with an existing
12613 * I/O stream
12614 *
12615 * Returns the new parser context or NULL
12616 */
12617xmlParserCtxtPtr
12618xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12619 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12620 void *ioctx, xmlCharEncoding enc) {
12621 xmlParserCtxtPtr ctxt;
12622 xmlParserInputPtr inputStream;
12623 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012624
Daniel Veillard42595322004-11-08 10:52:06 +000012625 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012626
12627 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012628 if (buf == NULL) {
12629 if (ioclose != NULL)
12630 ioclose(ioctx);
12631 return (NULL);
12632 }
Owen Taylor3473f882001-02-23 17:55:21 +000012633
12634 ctxt = xmlNewParserCtxt();
12635 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012636 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012637 return(NULL);
12638 }
12639 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012640#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012641 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012642#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012643 xmlFree(ctxt->sax);
12644 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12645 if (ctxt->sax == NULL) {
Elliott Hughesecdab2a2022-02-23 14:33:50 -080012646 xmlFreeParserInputBuffer(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012647 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012648 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012649 return(NULL);
12650 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012651 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12652 if (sax->initialized == XML_SAX2_MAGIC)
12653 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12654 else
12655 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012656 if (user_data != NULL)
12657 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012658 }
Owen Taylor3473f882001-02-23 17:55:21 +000012659
12660 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12661 if (inputStream == NULL) {
12662 xmlFreeParserCtxt(ctxt);
12663 return(NULL);
12664 }
12665 inputPush(ctxt, inputStream);
12666
12667 return(ctxt);
12668}
12669
Daniel Veillard4432df22003-09-28 18:58:27 +000012670#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012671/************************************************************************
12672 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012673 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012674 * *
12675 ************************************************************************/
12676
12677/**
12678 * xmlIOParseDTD:
12679 * @sax: the SAX handler block or NULL
12680 * @input: an Input Buffer
12681 * @enc: the charset encoding if known
12682 *
12683 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012684 *
Owen Taylor3473f882001-02-23 17:55:21 +000012685 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012686 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012687 */
12688
12689xmlDtdPtr
12690xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12691 xmlCharEncoding enc) {
12692 xmlDtdPtr ret = NULL;
12693 xmlParserCtxtPtr ctxt;
12694 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012695 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012696
12697 if (input == NULL)
12698 return(NULL);
12699
12700 ctxt = xmlNewParserCtxt();
12701 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012702 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012703 return(NULL);
12704 }
12705
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012706 /* We are loading a DTD */
12707 ctxt->options |= XML_PARSE_DTDLOAD;
12708
Owen Taylor3473f882001-02-23 17:55:21 +000012709 /*
12710 * Set-up the SAX context
12711 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012712 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012713 if (ctxt->sax != NULL)
12714 xmlFree(ctxt->sax);
12715 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012716 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012717 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012718 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012719
12720 /*
12721 * generate a parser input from the I/O handler
12722 */
12723
Daniel Veillard43caefb2003-12-07 19:32:22 +000012724 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012725 if (pinput == NULL) {
12726 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012727 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012728 xmlFreeParserCtxt(ctxt);
12729 return(NULL);
12730 }
12731
12732 /*
12733 * plug some encoding conversion routines here.
12734 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012735 if (xmlPushInput(ctxt, pinput) < 0) {
12736 if (sax != NULL) ctxt->sax = NULL;
12737 xmlFreeParserCtxt(ctxt);
12738 return(NULL);
12739 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012740 if (enc != XML_CHAR_ENCODING_NONE) {
12741 xmlSwitchEncoding(ctxt, enc);
12742 }
Owen Taylor3473f882001-02-23 17:55:21 +000012743
12744 pinput->filename = NULL;
12745 pinput->line = 1;
12746 pinput->col = 1;
12747 pinput->base = ctxt->input->cur;
12748 pinput->cur = ctxt->input->cur;
12749 pinput->free = NULL;
12750
12751 /*
12752 * let's parse that entity knowing it's an external subset.
12753 */
12754 ctxt->inSubset = 2;
12755 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012756 if (ctxt->myDoc == NULL) {
12757 xmlErrMemory(ctxt, "New Doc failed");
12758 return(NULL);
12759 }
12760 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012761 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12762 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012763
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012764 if ((enc == XML_CHAR_ENCODING_NONE) &&
12765 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012766 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012767 * Get the 4 first bytes and decode the charset
12768 * if enc != XML_CHAR_ENCODING_NONE
12769 * plug some encoding conversion routines.
12770 */
12771 start[0] = RAW;
12772 start[1] = NXT(1);
12773 start[2] = NXT(2);
12774 start[3] = NXT(3);
12775 enc = xmlDetectCharEncoding(start, 4);
12776 if (enc != XML_CHAR_ENCODING_NONE) {
12777 xmlSwitchEncoding(ctxt, enc);
12778 }
12779 }
12780
Owen Taylor3473f882001-02-23 17:55:21 +000012781 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12782
12783 if (ctxt->myDoc != NULL) {
12784 if (ctxt->wellFormed) {
12785 ret = ctxt->myDoc->extSubset;
12786 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012787 if (ret != NULL) {
12788 xmlNodePtr tmp;
12789
12790 ret->doc = NULL;
12791 tmp = ret->children;
12792 while (tmp != NULL) {
12793 tmp->doc = NULL;
12794 tmp = tmp->next;
12795 }
12796 }
Owen Taylor3473f882001-02-23 17:55:21 +000012797 } else {
12798 ret = NULL;
12799 }
12800 xmlFreeDoc(ctxt->myDoc);
12801 ctxt->myDoc = NULL;
12802 }
12803 if (sax != NULL) ctxt->sax = NULL;
12804 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012805
Owen Taylor3473f882001-02-23 17:55:21 +000012806 return(ret);
12807}
12808
12809/**
12810 * xmlSAXParseDTD:
12811 * @sax: the SAX handler block
12812 * @ExternalID: a NAME* containing the External ID of the DTD
12813 * @SystemID: a NAME* containing the URL to the DTD
12814 *
12815 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012816 *
Owen Taylor3473f882001-02-23 17:55:21 +000012817 * Returns the resulting xmlDtdPtr or NULL in case of error.
12818 */
12819
12820xmlDtdPtr
12821xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12822 const xmlChar *SystemID) {
12823 xmlDtdPtr ret = NULL;
12824 xmlParserCtxtPtr ctxt;
12825 xmlParserInputPtr input = NULL;
12826 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012827 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012828
12829 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12830
12831 ctxt = xmlNewParserCtxt();
12832 if (ctxt == NULL) {
12833 return(NULL);
12834 }
12835
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012836 /* We are loading a DTD */
12837 ctxt->options |= XML_PARSE_DTDLOAD;
12838
Owen Taylor3473f882001-02-23 17:55:21 +000012839 /*
12840 * Set-up the SAX context
12841 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012842 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012843 if (ctxt->sax != NULL)
12844 xmlFree(ctxt->sax);
12845 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012846 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012847 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012848
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012849 /*
12850 * Canonicalise the system ID
12851 */
12852 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012853 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012854 xmlFreeParserCtxt(ctxt);
12855 return(NULL);
12856 }
Owen Taylor3473f882001-02-23 17:55:21 +000012857
12858 /*
12859 * Ask the Entity resolver to load the damn thing
12860 */
12861
12862 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012863 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12864 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012865 if (input == NULL) {
12866 if (sax != NULL) ctxt->sax = NULL;
12867 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012868 if (systemIdCanonic != NULL)
12869 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012870 return(NULL);
12871 }
12872
12873 /*
12874 * plug some encoding conversion routines here.
12875 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012876 if (xmlPushInput(ctxt, input) < 0) {
12877 if (sax != NULL) ctxt->sax = NULL;
12878 xmlFreeParserCtxt(ctxt);
12879 if (systemIdCanonic != NULL)
12880 xmlFree(systemIdCanonic);
12881 return(NULL);
12882 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012883 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12884 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12885 xmlSwitchEncoding(ctxt, enc);
12886 }
Owen Taylor3473f882001-02-23 17:55:21 +000012887
12888 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012889 input->filename = (char *) systemIdCanonic;
12890 else
12891 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012892 input->line = 1;
12893 input->col = 1;
12894 input->base = ctxt->input->cur;
12895 input->cur = ctxt->input->cur;
12896 input->free = NULL;
12897
12898 /*
12899 * let's parse that entity knowing it's an external subset.
12900 */
12901 ctxt->inSubset = 2;
12902 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012903 if (ctxt->myDoc == NULL) {
12904 xmlErrMemory(ctxt, "New Doc failed");
12905 if (sax != NULL) ctxt->sax = NULL;
12906 xmlFreeParserCtxt(ctxt);
12907 return(NULL);
12908 }
12909 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012910 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12911 ExternalID, SystemID);
12912 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12913
12914 if (ctxt->myDoc != NULL) {
12915 if (ctxt->wellFormed) {
12916 ret = ctxt->myDoc->extSubset;
12917 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012918 if (ret != NULL) {
12919 xmlNodePtr tmp;
12920
12921 ret->doc = NULL;
12922 tmp = ret->children;
12923 while (tmp != NULL) {
12924 tmp->doc = NULL;
12925 tmp = tmp->next;
12926 }
12927 }
Owen Taylor3473f882001-02-23 17:55:21 +000012928 } else {
12929 ret = NULL;
12930 }
12931 xmlFreeDoc(ctxt->myDoc);
12932 ctxt->myDoc = NULL;
12933 }
12934 if (sax != NULL) ctxt->sax = NULL;
12935 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012936
Owen Taylor3473f882001-02-23 17:55:21 +000012937 return(ret);
12938}
12939
Daniel Veillard4432df22003-09-28 18:58:27 +000012940
Owen Taylor3473f882001-02-23 17:55:21 +000012941/**
12942 * xmlParseDTD:
12943 * @ExternalID: a NAME* containing the External ID of the DTD
12944 * @SystemID: a NAME* containing the URL to the DTD
12945 *
12946 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012947 *
Owen Taylor3473f882001-02-23 17:55:21 +000012948 * Returns the resulting xmlDtdPtr or NULL in case of error.
12949 */
12950
12951xmlDtdPtr
12952xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12953 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12954}
Daniel Veillard4432df22003-09-28 18:58:27 +000012955#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012956
12957/************************************************************************
12958 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012959 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012960 * *
12961 ************************************************************************/
12962
12963/**
Owen Taylor3473f882001-02-23 17:55:21 +000012964 * xmlParseCtxtExternalEntity:
12965 * @ctx: the existing parsing context
12966 * @URL: the URL for the entity to load
12967 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012968 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012969 *
12970 * Parse an external general entity within an existing parsing context
12971 * An external general parsed entity is well-formed if it matches the
12972 * production labeled extParsedEnt.
12973 *
12974 * [78] extParsedEnt ::= TextDecl? content
12975 *
12976 * Returns 0 if the entity is well formed, -1 in case of args problem and
12977 * the parser error code otherwise
12978 */
12979
12980int
12981xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012982 const xmlChar *ID, xmlNodePtr *lst) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012983 void *userData;
Owen Taylor3473f882001-02-23 17:55:21 +000012984
Daniel Veillardce682bc2004-11-05 17:22:25 +000012985 if (ctx == NULL) return(-1);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012986 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012987 * If the user provided their own SAX callbacks, then reuse the
12988 * userData callback field, otherwise the expected setup in a
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012989 * DOM builder is to have userData == ctxt
12990 */
12991 if (ctx->userData == ctx)
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012992 userData = NULL;
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012993 else
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012994 userData = ctx->userData;
12995 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12996 userData, ctx->depth + 1,
12997 URL, ID, lst);
Owen Taylor3473f882001-02-23 17:55:21 +000012998}
12999
13000/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013001 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013002 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013003 * @oldctxt: the previous parser context if available
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013004 * @sax: the SAX handler block (possibly NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013005 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13006 * @depth: Used for loop detection, use 0
13007 * @URL: the URL for the entity to load
13008 * @ID: the System ID for the entity to load
13009 * @list: the return value for the set of parsed nodes
13010 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013011 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013012 *
13013 * Returns 0 if the entity is well formed, -1 in case of args problem and
13014 * the parser error code otherwise
13015 */
13016
Daniel Veillard7d515752003-09-26 19:12:37 +000013017static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013018xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13019 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013020 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013021 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013022 xmlParserCtxtPtr ctxt;
13023 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013024 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013025 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013026 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013027 xmlChar start[4];
13028 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013029
Daniel Veillard0161e632008-08-28 15:36:32 +000013030 if (((depth > 40) &&
13031 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13032 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013033 return(XML_ERR_ENTITY_LOOP);
13034 }
13035
Owen Taylor3473f882001-02-23 17:55:21 +000013036 if (list != NULL)
13037 *list = NULL;
13038 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013039 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013040 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013041 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013042
13043
Rob Richards9c0aa472009-03-26 18:10:19 +000013044 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013045 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013046 ctxt->userData = ctxt;
13047 if (sax != NULL) {
13048 oldsax = ctxt->sax;
13049 ctxt->sax = sax;
13050 if (user_data != NULL)
13051 ctxt->userData = user_data;
13052 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013053 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013054 newDoc = xmlNewDoc(BAD_CAST "1.0");
13055 if (newDoc == NULL) {
13056 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013057 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013058 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013059 newDoc->properties = XML_DOC_INTERNAL;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013060 if (doc) {
13061 newDoc->intSubset = doc->intSubset;
13062 newDoc->extSubset = doc->extSubset;
13063 if (doc->dict) {
13064 newDoc->dict = doc->dict;
13065 xmlDictReference(newDoc->dict);
13066 }
13067 if (doc->URL != NULL) {
13068 newDoc->URL = xmlStrdup(doc->URL);
13069 }
Owen Taylor3473f882001-02-23 17:55:21 +000013070 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013071 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13072 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013073 if (sax != NULL)
13074 ctxt->sax = oldsax;
13075 xmlFreeParserCtxt(ctxt);
13076 newDoc->intSubset = NULL;
13077 newDoc->extSubset = NULL;
13078 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013079 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013080 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013081 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013082 nodePush(ctxt, newDoc->children);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013083 if (doc == NULL) {
13084 ctxt->myDoc = newDoc;
13085 } else {
13086 ctxt->myDoc = doc;
13087 newRoot->doc = doc;
13088 }
Owen Taylor3473f882001-02-23 17:55:21 +000013089
Daniel Veillard0161e632008-08-28 15:36:32 +000013090 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013091 * Get the 4 first bytes and decode the charset
13092 * if enc != XML_CHAR_ENCODING_NONE
13093 * plug some encoding conversion routines.
13094 */
13095 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013096 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13097 start[0] = RAW;
13098 start[1] = NXT(1);
13099 start[2] = NXT(2);
13100 start[3] = NXT(3);
13101 enc = xmlDetectCharEncoding(start, 4);
13102 if (enc != XML_CHAR_ENCODING_NONE) {
13103 xmlSwitchEncoding(ctxt, enc);
13104 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013105 }
13106
Owen Taylor3473f882001-02-23 17:55:21 +000013107 /*
13108 * Parse a possible text declaration first
13109 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013110 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013111 xmlParseTextDecl(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013112 /*
13113 * An XML-1.0 document can't reference an entity not XML-1.0
13114 */
13115 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13116 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13117 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13118 "Version mismatch between document and entity\n");
13119 }
Owen Taylor3473f882001-02-23 17:55:21 +000013120 }
13121
Owen Taylor3473f882001-02-23 17:55:21 +000013122 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013123 ctxt->depth = depth;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013124 if (oldctxt != NULL) {
13125 ctxt->_private = oldctxt->_private;
13126 ctxt->loadsubset = oldctxt->loadsubset;
13127 ctxt->validate = oldctxt->validate;
13128 ctxt->valid = oldctxt->valid;
13129 ctxt->replaceEntities = oldctxt->replaceEntities;
13130 if (oldctxt->validate) {
13131 ctxt->vctxt.error = oldctxt->vctxt.error;
13132 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13133 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13134 }
13135 ctxt->external = oldctxt->external;
13136 if (ctxt->dict) xmlDictFree(ctxt->dict);
13137 ctxt->dict = oldctxt->dict;
13138 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13139 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13140 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13141 ctxt->dictNames = oldctxt->dictNames;
13142 ctxt->attsDefault = oldctxt->attsDefault;
13143 ctxt->attsSpecial = oldctxt->attsSpecial;
13144 ctxt->linenumbers = oldctxt->linenumbers;
13145 ctxt->record_info = oldctxt->record_info;
13146 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13147 ctxt->node_seq.length = oldctxt->node_seq.length;
13148 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13149 } else {
13150 /*
13151 * Doing validity checking on chunk without context
13152 * doesn't make sense
13153 */
13154 ctxt->_private = NULL;
13155 ctxt->validate = 0;
13156 ctxt->external = 2;
13157 ctxt->loadsubset = 0;
13158 }
Owen Taylor3473f882001-02-23 17:55:21 +000013159
13160 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013161
Daniel Veillard561b7f82002-03-20 21:55:57 +000013162 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013163 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013164 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013165 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013166 }
13167 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013168 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013169 }
13170
13171 if (!ctxt->wellFormed) {
13172 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013173 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013174 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013175 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013176 } else {
13177 if (list != NULL) {
13178 xmlNodePtr cur;
13179
13180 /*
13181 * Return the newly created nodeset after unlinking it from
13182 * they pseudo parent.
13183 */
13184 cur = newDoc->children->children;
13185 *list = cur;
13186 while (cur != NULL) {
13187 cur->parent = NULL;
13188 cur = cur->next;
13189 }
13190 newDoc->children->children = NULL;
13191 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013192 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013193 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013194
13195 /*
13196 * Record in the parent context the number of entities replacement
13197 * done when parsing that reference.
13198 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013199 if (oldctxt != NULL)
13200 oldctxt->nbentities += ctxt->nbentities;
13201
Daniel Veillard0161e632008-08-28 15:36:32 +000013202 /*
13203 * Also record the size of the entity parsed
13204 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013205 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013206 oldctxt->sizeentities += ctxt->input->consumed;
13207 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13208 }
13209 /*
13210 * And record the last error if any
13211 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013212 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013213 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13214
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013215 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013216 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013217 if (oldctxt != NULL) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013218 ctxt->dict = NULL;
13219 ctxt->attsDefault = NULL;
13220 ctxt->attsSpecial = NULL;
13221 oldctxt->validate = ctxt->validate;
13222 oldctxt->valid = ctxt->valid;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013223 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13224 oldctxt->node_seq.length = ctxt->node_seq.length;
13225 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13226 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013227 ctxt->node_seq.maximum = 0;
13228 ctxt->node_seq.length = 0;
13229 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013230 xmlFreeParserCtxt(ctxt);
13231 newDoc->intSubset = NULL;
13232 newDoc->extSubset = NULL;
13233 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013234
Owen Taylor3473f882001-02-23 17:55:21 +000013235 return(ret);
13236}
13237
Daniel Veillard81273902003-09-30 00:43:48 +000013238#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013239/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013240 * xmlParseExternalEntity:
13241 * @doc: the document the chunk pertains to
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013242 * @sax: the SAX handler block (possibly NULL)
Daniel Veillard257d9102001-05-08 10:41:44 +000013243 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13244 * @depth: Used for loop detection, use 0
13245 * @URL: the URL for the entity to load
13246 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013247 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013248 *
13249 * Parse an external general entity
13250 * An external general parsed entity is well-formed if it matches the
13251 * production labeled extParsedEnt.
13252 *
13253 * [78] extParsedEnt ::= TextDecl? content
13254 *
13255 * Returns 0 if the entity is well formed, -1 in case of args problem and
13256 * the parser error code otherwise
13257 */
13258
13259int
13260xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013261 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013262 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013263 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013264}
13265
13266/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013267 * xmlParseBalancedChunkMemory:
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013268 * @doc: the document the chunk pertains to (must not be NULL)
13269 * @sax: the SAX handler block (possibly NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013270 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13271 * @depth: Used for loop detection, use 0
13272 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013273 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013274 *
13275 * Parse a well-balanced chunk of an XML document
13276 * called by the parser
13277 * The allowed sequence for the Well Balanced Chunk is the one defined by
13278 * the content production in the XML grammar:
13279 *
13280 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13281 *
13282 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13283 * the parser error code otherwise
13284 */
13285
13286int
13287xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013288 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013289 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13290 depth, string, lst, 0 );
13291}
Daniel Veillard81273902003-09-30 00:43:48 +000013292#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013293
13294/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013295 * xmlParseBalancedChunkMemoryInternal:
13296 * @oldctxt: the existing parsing context
13297 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13298 * @user_data: the user data field for the parser context
13299 * @lst: the return value for the set of parsed nodes
13300 *
13301 *
13302 * Parse a well-balanced chunk of an XML document
13303 * called by the parser
13304 * The allowed sequence for the Well Balanced Chunk is the one defined by
13305 * the content production in the XML grammar:
13306 *
13307 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13308 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013309 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13310 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013311 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013312 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013313 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013314 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013315static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013316xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13317 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13318 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013319 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013320 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013321 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013322 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013323 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013324 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013325 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013326#ifdef SAX2
13327 int i;
13328#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013329
Daniel Veillard0161e632008-08-28 15:36:32 +000013330 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13331 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013332 return(XML_ERR_ENTITY_LOOP);
13333 }
13334
13335
13336 if (lst != NULL)
13337 *lst = NULL;
13338 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013339 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013340
13341 size = xmlStrlen(string);
13342
13343 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013344 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013345 if (user_data != NULL)
13346 ctxt->userData = user_data;
13347 else
13348 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013349 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13350 ctxt->dict = oldctxt->dict;
Daniel Veillardad88b542017-12-08 09:42:31 +010013351 ctxt->input_id = oldctxt->input_id + 1;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013352 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13353 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13354 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013355
Daniel Veillard74eaec12009-08-26 15:57:20 +020013356#ifdef SAX2
13357 /* propagate namespaces down the entity */
13358 for (i = 0;i < oldctxt->nsNr;i += 2) {
13359 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13360 }
13361#endif
13362
Daniel Veillard328f48c2002-11-15 15:24:34 +000013363 oldsax = ctxt->sax;
13364 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013365 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013366 ctxt->replaceEntities = oldctxt->replaceEntities;
13367 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013368
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013369 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013370 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013371 newDoc = xmlNewDoc(BAD_CAST "1.0");
13372 if (newDoc == NULL) {
13373 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013374 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013375 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013376 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013377 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013378 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013379 newDoc->dict = ctxt->dict;
13380 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013381 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013382 } else {
13383 ctxt->myDoc = oldctxt->myDoc;
13384 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013385 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013386 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013387 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13388 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013389 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013390 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013391 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013392 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013393 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013394 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013395 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013396 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013397 ctxt->myDoc->children = NULL;
13398 ctxt->myDoc->last = NULL;
13399 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013400 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013401 ctxt->instate = XML_PARSER_CONTENT;
13402 ctxt->depth = oldctxt->depth + 1;
13403
Daniel Veillard328f48c2002-11-15 15:24:34 +000013404 ctxt->validate = 0;
13405 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013406 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13407 /*
13408 * ID/IDREF registration will be done in xmlValidateElement below
13409 */
13410 ctxt->loadsubset |= XML_SKIP_IDS;
13411 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013412 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013413 ctxt->attsDefault = oldctxt->attsDefault;
13414 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013415
Daniel Veillard68e9e742002-11-16 15:35:11 +000013416 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013417 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013418 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013419 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013420 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013421 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013422 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013423 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013424 }
13425
13426 if (!ctxt->wellFormed) {
13427 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013428 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013429 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013430 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013431 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013432 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013433 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013434
William M. Brack7b9154b2003-09-27 19:23:50 +000013435 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013436 xmlNodePtr cur;
13437
13438 /*
13439 * Return the newly created nodeset after unlinking it from
13440 * they pseudo parent.
13441 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013442 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013443 *lst = cur;
13444 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013445#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013446 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13447 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13448 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013449 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13450 oldctxt->myDoc, cur);
13451 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013452#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013453 cur->parent = NULL;
13454 cur = cur->next;
13455 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013456 ctxt->myDoc->children->children = NULL;
13457 }
13458 if (ctxt->myDoc != NULL) {
13459 xmlFreeNode(ctxt->myDoc->children);
13460 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013461 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013462 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013463
13464 /*
13465 * Record in the parent context the number of entities replacement
13466 * done when parsing that reference.
13467 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013468 if (oldctxt != NULL)
13469 oldctxt->nbentities += ctxt->nbentities;
13470
Daniel Veillard0161e632008-08-28 15:36:32 +000013471 /*
13472 * Also record the last error if any
13473 */
13474 if (ctxt->lastError.code != XML_ERR_OK)
13475 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13476
Daniel Veillard328f48c2002-11-15 15:24:34 +000013477 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013478 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013479 ctxt->attsDefault = NULL;
13480 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013481 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013482 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013483 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013484 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013485
Daniel Veillard328f48c2002-11-15 15:24:34 +000013486 return(ret);
13487}
13488
Daniel Veillard29b17482004-08-16 00:39:03 +000013489/**
13490 * xmlParseInNodeContext:
13491 * @node: the context node
13492 * @data: the input string
13493 * @datalen: the input string length in bytes
13494 * @options: a combination of xmlParserOption
13495 * @lst: the return value for the set of parsed nodes
13496 *
13497 * Parse a well-balanced chunk of an XML document
13498 * within the context (DTD, namespaces, etc ...) of the given node.
13499 *
13500 * The allowed sequence for the data is a Well Balanced Chunk defined by
13501 * the content production in the XML grammar:
13502 *
13503 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13504 *
13505 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13506 * error code otherwise
13507 */
13508xmlParserErrors
13509xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13510 int options, xmlNodePtr *lst) {
13511#ifdef SAX2
13512 xmlParserCtxtPtr ctxt;
13513 xmlDocPtr doc = NULL;
13514 xmlNodePtr fake, cur;
13515 int nsnr = 0;
13516
13517 xmlParserErrors ret = XML_ERR_OK;
13518
13519 /*
13520 * check all input parameters, grab the document
13521 */
13522 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13523 return(XML_ERR_INTERNAL_ERROR);
13524 switch (node->type) {
13525 case XML_ELEMENT_NODE:
13526 case XML_ATTRIBUTE_NODE:
13527 case XML_TEXT_NODE:
13528 case XML_CDATA_SECTION_NODE:
13529 case XML_ENTITY_REF_NODE:
13530 case XML_PI_NODE:
13531 case XML_COMMENT_NODE:
13532 case XML_DOCUMENT_NODE:
13533 case XML_HTML_DOCUMENT_NODE:
13534 break;
13535 default:
13536 return(XML_ERR_INTERNAL_ERROR);
13537
13538 }
13539 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13540 (node->type != XML_DOCUMENT_NODE) &&
13541 (node->type != XML_HTML_DOCUMENT_NODE))
13542 node = node->parent;
13543 if (node == NULL)
13544 return(XML_ERR_INTERNAL_ERROR);
13545 if (node->type == XML_ELEMENT_NODE)
13546 doc = node->doc;
13547 else
13548 doc = (xmlDocPtr) node;
13549 if (doc == NULL)
13550 return(XML_ERR_INTERNAL_ERROR);
13551
13552 /*
13553 * allocate a context and set-up everything not related to the
13554 * node position in the tree
13555 */
13556 if (doc->type == XML_DOCUMENT_NODE)
13557 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13558#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013559 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013560 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013561 /*
13562 * When parsing in context, it makes no sense to add implied
13563 * elements like html/body/etc...
13564 */
13565 options |= HTML_PARSE_NOIMPLIED;
13566 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013567#endif
13568 else
13569 return(XML_ERR_INTERNAL_ERROR);
13570
13571 if (ctxt == NULL)
13572 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013573
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013574 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013575 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13576 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13577 * we must wait until the last moment to free the original one.
13578 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013579 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013580 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013581 xmlDictFree(ctxt->dict);
13582 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013583 } else
13584 options |= XML_PARSE_NODICT;
13585
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013586 if (doc->encoding != NULL) {
13587 xmlCharEncodingHandlerPtr hdlr;
13588
13589 if (ctxt->encoding != NULL)
13590 xmlFree((xmlChar *) ctxt->encoding);
13591 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13592
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013593 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013594 if (hdlr != NULL) {
13595 xmlSwitchToEncoding(ctxt, hdlr);
13596 } else {
13597 return(XML_ERR_UNSUPPORTED_ENCODING);
13598 }
13599 }
13600
Daniel Veillard37334572008-07-31 08:20:02 +000013601 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013602 xmlDetectSAX2(ctxt);
13603 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013604 /* parsing in context, i.e. as within existing content */
Daniel Veillardad88b542017-12-08 09:42:31 +010013605 ctxt->input_id = 2;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013606 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013607
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013608 fake = xmlNewComment(NULL);
13609 if (fake == NULL) {
13610 xmlFreeParserCtxt(ctxt);
13611 return(XML_ERR_NO_MEMORY);
13612 }
13613 xmlAddChild(node, fake);
13614
Daniel Veillard29b17482004-08-16 00:39:03 +000013615 if (node->type == XML_ELEMENT_NODE) {
13616 nodePush(ctxt, node);
13617 /*
13618 * initialize the SAX2 namespaces stack
13619 */
13620 cur = node;
13621 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13622 xmlNsPtr ns = cur->nsDef;
13623 const xmlChar *iprefix, *ihref;
13624
13625 while (ns != NULL) {
13626 if (ctxt->dict) {
13627 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13628 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13629 } else {
13630 iprefix = ns->prefix;
13631 ihref = ns->href;
13632 }
13633
13634 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13635 nsPush(ctxt, iprefix, ihref);
13636 nsnr++;
13637 }
13638 ns = ns->next;
13639 }
13640 cur = cur->parent;
13641 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013642 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013643
13644 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13645 /*
13646 * ID/IDREF registration will be done in xmlValidateElement below
13647 */
13648 ctxt->loadsubset |= XML_SKIP_IDS;
13649 }
13650
Daniel Veillard499cc922006-01-18 17:22:35 +000013651#ifdef LIBXML_HTML_ENABLED
13652 if (doc->type == XML_HTML_DOCUMENT_NODE)
13653 __htmlParseContent(ctxt);
13654 else
13655#endif
13656 xmlParseContent(ctxt);
13657
Daniel Veillard29b17482004-08-16 00:39:03 +000013658 nsPop(ctxt, nsnr);
13659 if ((RAW == '<') && (NXT(1) == '/')) {
13660 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13661 } else if (RAW != 0) {
13662 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13663 }
13664 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13665 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13666 ctxt->wellFormed = 0;
13667 }
13668
13669 if (!ctxt->wellFormed) {
13670 if (ctxt->errNo == 0)
13671 ret = XML_ERR_INTERNAL_ERROR;
13672 else
13673 ret = (xmlParserErrors)ctxt->errNo;
13674 } else {
13675 ret = XML_ERR_OK;
13676 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013677
Daniel Veillard29b17482004-08-16 00:39:03 +000013678 /*
13679 * Return the newly created nodeset after unlinking it from
13680 * the pseudo sibling.
13681 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013682
Daniel Veillard29b17482004-08-16 00:39:03 +000013683 cur = fake->next;
13684 fake->next = NULL;
13685 node->last = fake;
13686
13687 if (cur != NULL) {
13688 cur->prev = NULL;
13689 }
13690
13691 *lst = cur;
13692
13693 while (cur != NULL) {
13694 cur->parent = NULL;
13695 cur = cur->next;
13696 }
13697
13698 xmlUnlinkNode(fake);
13699 xmlFreeNode(fake);
13700
13701
13702 if (ret != XML_ERR_OK) {
13703 xmlFreeNodeList(*lst);
13704 *lst = NULL;
13705 }
William M. Brackc3f81342004-10-03 01:22:44 +000013706
William M. Brackb7b54de2004-10-06 16:38:01 +000013707 if (doc->dict != NULL)
13708 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013709 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013710
Daniel Veillard29b17482004-08-16 00:39:03 +000013711 return(ret);
13712#else /* !SAX2 */
13713 return(XML_ERR_INTERNAL_ERROR);
13714#endif
13715}
13716
Daniel Veillard81273902003-09-30 00:43:48 +000013717#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013718/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013719 * xmlParseBalancedChunkMemoryRecover:
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013720 * @doc: the document the chunk pertains to (must not be NULL)
13721 * @sax: the SAX handler block (possibly NULL)
Daniel Veillard58e44c92002-08-02 22:19:49 +000013722 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13723 * @depth: Used for loop detection, use 0
13724 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13725 * @lst: the return value for the set of parsed nodes
13726 * @recover: return nodes even if the data is broken (use 0)
13727 *
13728 *
13729 * Parse a well-balanced chunk of an XML document
13730 * called by the parser
13731 * The allowed sequence for the Well Balanced Chunk is the one defined by
13732 * the content production in the XML grammar:
13733 *
13734 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13735 *
13736 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13737 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013738 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013739 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013740 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13741 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013742 */
13743int
13744xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013745 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013746 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013747 xmlParserCtxtPtr ctxt;
13748 xmlDocPtr newDoc;
13749 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013750 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013751 int size;
13752 int ret = 0;
13753
Daniel Veillard0161e632008-08-28 15:36:32 +000013754 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013755 return(XML_ERR_ENTITY_LOOP);
13756 }
13757
13758
Daniel Veillardcda96922001-08-21 10:56:31 +000013759 if (lst != NULL)
13760 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013761 if (string == NULL)
13762 return(-1);
13763
13764 size = xmlStrlen(string);
13765
13766 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13767 if (ctxt == NULL) return(-1);
13768 ctxt->userData = ctxt;
13769 if (sax != NULL) {
13770 oldsax = ctxt->sax;
13771 ctxt->sax = sax;
13772 if (user_data != NULL)
13773 ctxt->userData = user_data;
13774 }
13775 newDoc = xmlNewDoc(BAD_CAST "1.0");
13776 if (newDoc == NULL) {
13777 xmlFreeParserCtxt(ctxt);
13778 return(-1);
13779 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013780 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013781 if ((doc != NULL) && (doc->dict != NULL)) {
13782 xmlDictFree(ctxt->dict);
13783 ctxt->dict = doc->dict;
13784 xmlDictReference(ctxt->dict);
13785 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13786 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13787 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13788 ctxt->dictNames = 1;
13789 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013790 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013791 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013792 /* doc == NULL is only supported for historic reasons */
Owen Taylor3473f882001-02-23 17:55:21 +000013793 if (doc != NULL) {
13794 newDoc->intSubset = doc->intSubset;
13795 newDoc->extSubset = doc->extSubset;
13796 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013797 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13798 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013799 if (sax != NULL)
13800 ctxt->sax = oldsax;
13801 xmlFreeParserCtxt(ctxt);
13802 newDoc->intSubset = NULL;
13803 newDoc->extSubset = NULL;
13804 xmlFreeDoc(newDoc);
13805 return(-1);
13806 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013807 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13808 nodePush(ctxt, newRoot);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013809 /* doc == NULL is only supported for historic reasons */
Owen Taylor3473f882001-02-23 17:55:21 +000013810 if (doc == NULL) {
13811 ctxt->myDoc = newDoc;
13812 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013813 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013814 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013815 /* Ensure that doc has XML spec namespace */
13816 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13817 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013818 }
13819 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardad88b542017-12-08 09:42:31 +010013820 ctxt->input_id = 2;
Owen Taylor3473f882001-02-23 17:55:21 +000013821 ctxt->depth = depth;
13822
13823 /*
13824 * Doing validity checking on chunk doesn't make sense
13825 */
13826 ctxt->validate = 0;
13827 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013828 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013829
Daniel Veillardb39bc392002-10-26 19:29:51 +000013830 if ( doc != NULL ){
13831 content = doc->children;
13832 doc->children = NULL;
13833 xmlParseContent(ctxt);
13834 doc->children = content;
13835 }
13836 else {
13837 xmlParseContent(ctxt);
13838 }
Owen Taylor3473f882001-02-23 17:55:21 +000013839 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013840 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013841 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013842 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013843 }
13844 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013845 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013846 }
13847
13848 if (!ctxt->wellFormed) {
13849 if (ctxt->errNo == 0)
13850 ret = 1;
13851 else
13852 ret = ctxt->errNo;
13853 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013854 ret = 0;
13855 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013856
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013857 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13858 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013859
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013860 /*
13861 * Return the newly created nodeset after unlinking it from
13862 * they pseudo parent.
13863 */
13864 cur = newDoc->children->children;
13865 *lst = cur;
13866 while (cur != NULL) {
13867 xmlSetTreeDoc(cur, doc);
13868 cur->parent = NULL;
13869 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013870 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013871 newDoc->children->children = NULL;
13872 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013873
13874 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013875 ctxt->sax = oldsax;
13876 xmlFreeParserCtxt(ctxt);
13877 newDoc->intSubset = NULL;
13878 newDoc->extSubset = NULL;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013879 /* This leaks the namespace list if doc == NULL */
Rob Richardsa02f1992006-09-16 14:04:26 +000013880 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013881 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013882
Owen Taylor3473f882001-02-23 17:55:21 +000013883 return(ret);
13884}
13885
13886/**
13887 * xmlSAXParseEntity:
13888 * @sax: the SAX handler block
13889 * @filename: the filename
13890 *
13891 * parse an XML external entity out of context and build a tree.
13892 * It use the given SAX function block to handle the parsing callback.
13893 * If sax is NULL, fallback to the default DOM tree building routines.
13894 *
13895 * [78] extParsedEnt ::= TextDecl? content
13896 *
13897 * This correspond to a "Well Balanced" chunk
13898 *
13899 * Returns the resulting document tree
13900 */
13901
13902xmlDocPtr
13903xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13904 xmlDocPtr ret;
13905 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013906
13907 ctxt = xmlCreateFileParserCtxt(filename);
13908 if (ctxt == NULL) {
13909 return(NULL);
13910 }
13911 if (sax != NULL) {
13912 if (ctxt->sax != NULL)
13913 xmlFree(ctxt->sax);
13914 ctxt->sax = sax;
13915 ctxt->userData = NULL;
13916 }
13917
Owen Taylor3473f882001-02-23 17:55:21 +000013918 xmlParseExtParsedEnt(ctxt);
13919
13920 if (ctxt->wellFormed)
13921 ret = ctxt->myDoc;
13922 else {
13923 ret = NULL;
13924 xmlFreeDoc(ctxt->myDoc);
13925 ctxt->myDoc = NULL;
13926 }
13927 if (sax != NULL)
13928 ctxt->sax = NULL;
13929 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013930
Owen Taylor3473f882001-02-23 17:55:21 +000013931 return(ret);
13932}
13933
13934/**
13935 * xmlParseEntity:
13936 * @filename: the filename
13937 *
13938 * parse an XML external entity out of context and build a tree.
13939 *
13940 * [78] extParsedEnt ::= TextDecl? content
13941 *
13942 * This correspond to a "Well Balanced" chunk
13943 *
13944 * Returns the resulting document tree
13945 */
13946
13947xmlDocPtr
13948xmlParseEntity(const char *filename) {
13949 return(xmlSAXParseEntity(NULL, filename));
13950}
Daniel Veillard81273902003-09-30 00:43:48 +000013951#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013952
13953/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013954 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013955 * @URL: the entity URL
13956 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013957 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013958 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013959 *
13960 * Create a parser context for an external entity
13961 * Automatic support for ZLIB/Compress compressed document is provided
13962 * by default if found at compile-time.
13963 *
13964 * Returns the new parser context or NULL
13965 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013966static xmlParserCtxtPtr
13967xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13968 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013969 xmlParserCtxtPtr ctxt;
13970 xmlParserInputPtr inputStream;
13971 char *directory = NULL;
13972 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013973
Owen Taylor3473f882001-02-23 17:55:21 +000013974 ctxt = xmlNewParserCtxt();
13975 if (ctxt == NULL) {
13976 return(NULL);
13977 }
13978
Daniel Veillard48247b42009-07-10 16:12:46 +020013979 if (pctx != NULL) {
13980 ctxt->options = pctx->options;
13981 ctxt->_private = pctx->_private;
Daniel Veillardad88b542017-12-08 09:42:31 +010013982 /*
13983 * this is a subparser of pctx, so the input_id should be
13984 * incremented to distinguish from main entity
13985 */
13986 ctxt->input_id = pctx->input_id + 1;
Rob Richards9c0aa472009-03-26 18:10:19 +000013987 }
13988
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013989 /* Don't read from stdin. */
13990 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13991 URL = BAD_CAST "./-";
13992
Owen Taylor3473f882001-02-23 17:55:21 +000013993 uri = xmlBuildURI(URL, base);
13994
13995 if (uri == NULL) {
13996 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13997 if (inputStream == NULL) {
13998 xmlFreeParserCtxt(ctxt);
13999 return(NULL);
14000 }
14001
14002 inputPush(ctxt, inputStream);
14003
14004 if ((ctxt->directory == NULL) && (directory == NULL))
14005 directory = xmlParserGetDirectory((char *)URL);
14006 if ((ctxt->directory == NULL) && (directory != NULL))
14007 ctxt->directory = directory;
14008 } else {
14009 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14010 if (inputStream == NULL) {
14011 xmlFree(uri);
14012 xmlFreeParserCtxt(ctxt);
14013 return(NULL);
14014 }
14015
14016 inputPush(ctxt, inputStream);
14017
14018 if ((ctxt->directory == NULL) && (directory == NULL))
14019 directory = xmlParserGetDirectory((char *)uri);
14020 if ((ctxt->directory == NULL) && (directory != NULL))
14021 ctxt->directory = directory;
14022 xmlFree(uri);
14023 }
Owen Taylor3473f882001-02-23 17:55:21 +000014024 return(ctxt);
14025}
14026
Rob Richards9c0aa472009-03-26 18:10:19 +000014027/**
14028 * xmlCreateEntityParserCtxt:
14029 * @URL: the entity URL
14030 * @ID: the entity PUBLIC ID
14031 * @base: a possible base for the target URI
14032 *
14033 * Create a parser context for an external entity
14034 * Automatic support for ZLIB/Compress compressed document is provided
14035 * by default if found at compile-time.
14036 *
14037 * Returns the new parser context or NULL
14038 */
14039xmlParserCtxtPtr
14040xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14041 const xmlChar *base) {
14042 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14043
14044}
14045
Owen Taylor3473f882001-02-23 17:55:21 +000014046/************************************************************************
14047 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014048 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014049 * *
14050 ************************************************************************/
14051
14052/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014053 * xmlCreateURLParserCtxt:
14054 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014055 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014056 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014057 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014058 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014059 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014060 *
14061 * Returns the new parser context or NULL
14062 */
14063xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014064xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014065{
14066 xmlParserCtxtPtr ctxt;
14067 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014068 char *directory = NULL;
14069
Owen Taylor3473f882001-02-23 17:55:21 +000014070 ctxt = xmlNewParserCtxt();
14071 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014072 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014073 return(NULL);
14074 }
14075
Daniel Veillarddf292f72005-01-16 19:00:15 +000014076 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014077 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014078 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014079
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014080 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014081 if (inputStream == NULL) {
14082 xmlFreeParserCtxt(ctxt);
14083 return(NULL);
14084 }
14085
Owen Taylor3473f882001-02-23 17:55:21 +000014086 inputPush(ctxt, inputStream);
14087 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014088 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014089 if ((ctxt->directory == NULL) && (directory != NULL))
14090 ctxt->directory = directory;
14091
14092 return(ctxt);
14093}
14094
Daniel Veillard61b93382003-11-03 14:28:31 +000014095/**
14096 * xmlCreateFileParserCtxt:
14097 * @filename: the filename
14098 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014099 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014100 * Automatic support for ZLIB/Compress compressed document is provided
14101 * by default if found at compile-time.
14102 *
14103 * Returns the new parser context or NULL
14104 */
14105xmlParserCtxtPtr
14106xmlCreateFileParserCtxt(const char *filename)
14107{
14108 return(xmlCreateURLParserCtxt(filename, 0));
14109}
14110
Daniel Veillard81273902003-09-30 00:43:48 +000014111#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014112/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014113 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014114 * @sax: the SAX handler block
14115 * @filename: the filename
14116 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14117 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014118 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014119 *
14120 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14121 * compressed document is provided by default if found at compile-time.
14122 * It use the given SAX function block to handle the parsing callback.
14123 * If sax is NULL, fallback to the default DOM tree building routines.
14124 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014125 * User data (void *) is stored within the parser context in the
14126 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014127 *
Owen Taylor3473f882001-02-23 17:55:21 +000014128 * Returns the resulting document tree
14129 */
14130
14131xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014132xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14133 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014134 xmlDocPtr ret;
14135 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014136
Daniel Veillard635ef722001-10-29 11:48:19 +000014137 xmlInitParser();
14138
Owen Taylor3473f882001-02-23 17:55:21 +000014139 ctxt = xmlCreateFileParserCtxt(filename);
14140 if (ctxt == NULL) {
14141 return(NULL);
14142 }
14143 if (sax != NULL) {
14144 if (ctxt->sax != NULL)
14145 xmlFree(ctxt->sax);
14146 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014147 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014148 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014149 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014150 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014151 }
Owen Taylor3473f882001-02-23 17:55:21 +000014152
Daniel Veillard37d2d162008-03-14 10:54:00 +000014153 if (ctxt->directory == NULL)
14154 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014155
Daniel Veillarddad3f682002-11-17 16:47:27 +000014156 ctxt->recovery = recovery;
14157
Owen Taylor3473f882001-02-23 17:55:21 +000014158 xmlParseDocument(ctxt);
14159
William M. Brackc07329e2003-09-08 01:57:30 +000014160 if ((ctxt->wellFormed) || recovery) {
14161 ret = ctxt->myDoc;
Haibo Huangd23e46c2020-10-28 22:26:09 -070014162 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014163 if (ctxt->input->buf->compressed > 0)
14164 ret->compression = 9;
14165 else
14166 ret->compression = ctxt->input->buf->compressed;
14167 }
William M. Brackc07329e2003-09-08 01:57:30 +000014168 }
Owen Taylor3473f882001-02-23 17:55:21 +000014169 else {
14170 ret = NULL;
14171 xmlFreeDoc(ctxt->myDoc);
14172 ctxt->myDoc = NULL;
14173 }
14174 if (sax != NULL)
14175 ctxt->sax = NULL;
14176 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014177
Owen Taylor3473f882001-02-23 17:55:21 +000014178 return(ret);
14179}
14180
14181/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014182 * xmlSAXParseFile:
14183 * @sax: the SAX handler block
14184 * @filename: the filename
14185 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14186 * documents
14187 *
14188 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14189 * compressed document is provided by default if found at compile-time.
14190 * It use the given SAX function block to handle the parsing callback.
14191 * If sax is NULL, fallback to the default DOM tree building routines.
14192 *
14193 * Returns the resulting document tree
14194 */
14195
14196xmlDocPtr
14197xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14198 int recovery) {
14199 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14200}
14201
14202/**
Owen Taylor3473f882001-02-23 17:55:21 +000014203 * xmlRecoverDoc:
14204 * @cur: a pointer to an array of xmlChar
14205 *
14206 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014207 * In the case the document is not Well Formed, a attempt to build a
14208 * tree is tried anyway
14209 *
14210 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014211 */
14212
14213xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014214xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014215 return(xmlSAXParseDoc(NULL, cur, 1));
14216}
14217
14218/**
14219 * xmlParseFile:
14220 * @filename: the filename
14221 *
14222 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14223 * compressed document is provided by default if found at compile-time.
14224 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014225 * Returns the resulting document tree if the file was wellformed,
14226 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014227 */
14228
14229xmlDocPtr
14230xmlParseFile(const char *filename) {
14231 return(xmlSAXParseFile(NULL, filename, 0));
14232}
14233
14234/**
14235 * xmlRecoverFile:
14236 * @filename: the filename
14237 *
14238 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14239 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014240 * In the case the document is not Well Formed, it attempts to build
14241 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014242 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014243 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014244 */
14245
14246xmlDocPtr
14247xmlRecoverFile(const char *filename) {
14248 return(xmlSAXParseFile(NULL, filename, 1));
14249}
14250
14251
14252/**
14253 * xmlSetupParserForBuffer:
14254 * @ctxt: an XML parser context
14255 * @buffer: a xmlChar * buffer
14256 * @filename: a file name
14257 *
14258 * Setup the parser context to parse a new buffer; Clears any prior
14259 * contents from the parser context. The buffer parameter must not be
14260 * NULL, but the filename parameter can be
14261 */
14262void
14263xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14264 const char* filename)
14265{
14266 xmlParserInputPtr input;
14267
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014268 if ((ctxt == NULL) || (buffer == NULL))
14269 return;
14270
Owen Taylor3473f882001-02-23 17:55:21 +000014271 input = xmlNewInputStream(ctxt);
14272 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014273 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014274 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014275 return;
14276 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014277
Owen Taylor3473f882001-02-23 17:55:21 +000014278 xmlClearParserCtxt(ctxt);
14279 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014280 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014281 input->base = buffer;
14282 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014283 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014284 inputPush(ctxt, input);
14285}
14286
14287/**
14288 * xmlSAXUserParseFile:
14289 * @sax: a SAX handler
14290 * @user_data: The user data returned on SAX callbacks
14291 * @filename: a file name
14292 *
14293 * parse an XML file and call the given SAX handler routines.
14294 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014295 *
Owen Taylor3473f882001-02-23 17:55:21 +000014296 * Returns 0 in case of success or a error number otherwise
14297 */
14298int
14299xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14300 const char *filename) {
14301 int ret = 0;
14302 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014303
Owen Taylor3473f882001-02-23 17:55:21 +000014304 ctxt = xmlCreateFileParserCtxt(filename);
14305 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014306 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014307 xmlFree(ctxt->sax);
14308 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014309 xmlDetectSAX2(ctxt);
14310
Owen Taylor3473f882001-02-23 17:55:21 +000014311 if (user_data != NULL)
14312 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014313
Owen Taylor3473f882001-02-23 17:55:21 +000014314 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014315
Owen Taylor3473f882001-02-23 17:55:21 +000014316 if (ctxt->wellFormed)
14317 ret = 0;
14318 else {
14319 if (ctxt->errNo != 0)
14320 ret = ctxt->errNo;
14321 else
14322 ret = -1;
14323 }
14324 if (sax != NULL)
14325 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014326 if (ctxt->myDoc != NULL) {
14327 xmlFreeDoc(ctxt->myDoc);
14328 ctxt->myDoc = NULL;
14329 }
Owen Taylor3473f882001-02-23 17:55:21 +000014330 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014331
Owen Taylor3473f882001-02-23 17:55:21 +000014332 return ret;
14333}
Daniel Veillard81273902003-09-30 00:43:48 +000014334#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014335
14336/************************************************************************
14337 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014338 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014339 * *
14340 ************************************************************************/
14341
14342/**
14343 * xmlCreateMemoryParserCtxt:
14344 * @buffer: a pointer to a char array
14345 * @size: the size of the array
14346 *
14347 * Create a parser context for an XML in-memory document.
14348 *
14349 * Returns the new parser context or NULL
14350 */
14351xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014352xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014353 xmlParserCtxtPtr ctxt;
14354 xmlParserInputPtr input;
14355 xmlParserInputBufferPtr buf;
14356
14357 if (buffer == NULL)
14358 return(NULL);
14359 if (size <= 0)
14360 return(NULL);
14361
14362 ctxt = xmlNewParserCtxt();
14363 if (ctxt == NULL)
14364 return(NULL);
14365
Daniel Veillard53350552003-09-18 13:35:51 +000014366 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014367 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014368 if (buf == NULL) {
14369 xmlFreeParserCtxt(ctxt);
14370 return(NULL);
14371 }
Owen Taylor3473f882001-02-23 17:55:21 +000014372
14373 input = xmlNewInputStream(ctxt);
14374 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014375 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014376 xmlFreeParserCtxt(ctxt);
14377 return(NULL);
14378 }
14379
14380 input->filename = NULL;
14381 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014382 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014383
14384 inputPush(ctxt, input);
14385 return(ctxt);
14386}
14387
Daniel Veillard81273902003-09-30 00:43:48 +000014388#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014389/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014390 * xmlSAXParseMemoryWithData:
14391 * @sax: the SAX handler block
14392 * @buffer: an pointer to a char array
14393 * @size: the size of the array
14394 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14395 * documents
14396 * @data: the userdata
14397 *
14398 * parse an XML in-memory block and use the given SAX function block
14399 * to handle the parsing callback. If sax is NULL, fallback to the default
14400 * DOM tree building routines.
14401 *
14402 * User data (void *) is stored within the parser context in the
14403 * context's _private member, so it is available nearly everywhere in libxml
14404 *
14405 * Returns the resulting document tree
14406 */
14407
14408xmlDocPtr
14409xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14410 int size, int recovery, void *data) {
14411 xmlDocPtr ret;
14412 xmlParserCtxtPtr ctxt;
14413
Daniel Veillardab2a7632009-07-09 08:45:03 +020014414 xmlInitParser();
14415
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014416 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14417 if (ctxt == NULL) return(NULL);
14418 if (sax != NULL) {
14419 if (ctxt->sax != NULL)
14420 xmlFree(ctxt->sax);
14421 ctxt->sax = sax;
14422 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014423 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014424 if (data!=NULL) {
14425 ctxt->_private=data;
14426 }
14427
Daniel Veillardadba5f12003-04-04 16:09:01 +000014428 ctxt->recovery = recovery;
14429
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014430 xmlParseDocument(ctxt);
14431
14432 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433 else {
14434 ret = NULL;
14435 xmlFreeDoc(ctxt->myDoc);
14436 ctxt->myDoc = NULL;
14437 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014438 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014439 ctxt->sax = NULL;
14440 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014441
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014442 return(ret);
14443}
14444
14445/**
Owen Taylor3473f882001-02-23 17:55:21 +000014446 * xmlSAXParseMemory:
14447 * @sax: the SAX handler block
14448 * @buffer: an pointer to a char array
14449 * @size: the size of the array
14450 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14451 * documents
14452 *
14453 * parse an XML in-memory block and use the given SAX function block
14454 * to handle the parsing callback. If sax is NULL, fallback to the default
14455 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014456 *
Owen Taylor3473f882001-02-23 17:55:21 +000014457 * Returns the resulting document tree
14458 */
14459xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014460xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14461 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014462 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014463}
14464
14465/**
14466 * xmlParseMemory:
14467 * @buffer: an pointer to a char array
14468 * @size: the size of the array
14469 *
14470 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014471 *
Owen Taylor3473f882001-02-23 17:55:21 +000014472 * Returns the resulting document tree
14473 */
14474
Daniel Veillard50822cb2001-07-26 20:05:51 +000014475xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014476 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14477}
14478
14479/**
14480 * xmlRecoverMemory:
14481 * @buffer: an pointer to a char array
14482 * @size: the size of the array
14483 *
14484 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014485 * In the case the document is not Well Formed, an attempt to
14486 * build a tree is tried anyway
14487 *
14488 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014489 */
14490
Daniel Veillard50822cb2001-07-26 20:05:51 +000014491xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014492 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14493}
14494
14495/**
14496 * xmlSAXUserParseMemory:
14497 * @sax: a SAX handler
14498 * @user_data: The user data returned on SAX callbacks
14499 * @buffer: an in-memory XML document input
14500 * @size: the length of the XML document in bytes
14501 *
14502 * A better SAX parsing routine.
14503 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014504 *
Owen Taylor3473f882001-02-23 17:55:21 +000014505 * Returns 0 in case of success or a error number otherwise
14506 */
14507int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014508 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014509 int ret = 0;
14510 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014511
14512 xmlInitParser();
14513
Owen Taylor3473f882001-02-23 17:55:21 +000014514 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14515 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014516 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14517 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014518 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014519 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014520
Daniel Veillard30211a02001-04-26 09:33:18 +000014521 if (user_data != NULL)
14522 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014523
Owen Taylor3473f882001-02-23 17:55:21 +000014524 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014525
Owen Taylor3473f882001-02-23 17:55:21 +000014526 if (ctxt->wellFormed)
14527 ret = 0;
14528 else {
14529 if (ctxt->errNo != 0)
14530 ret = ctxt->errNo;
14531 else
14532 ret = -1;
14533 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014534 if (sax != NULL)
14535 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014536 if (ctxt->myDoc != NULL) {
14537 xmlFreeDoc(ctxt->myDoc);
14538 ctxt->myDoc = NULL;
14539 }
Owen Taylor3473f882001-02-23 17:55:21 +000014540 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014541
Owen Taylor3473f882001-02-23 17:55:21 +000014542 return ret;
14543}
Daniel Veillard81273902003-09-30 00:43:48 +000014544#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014545
14546/**
14547 * xmlCreateDocParserCtxt:
14548 * @cur: a pointer to an array of xmlChar
14549 *
14550 * Creates a parser context for an XML in-memory document.
14551 *
14552 * Returns the new parser context or NULL
14553 */
14554xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014555xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014556 int len;
14557
14558 if (cur == NULL)
14559 return(NULL);
14560 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014561 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014562}
14563
Daniel Veillard81273902003-09-30 00:43:48 +000014564#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014565/**
14566 * xmlSAXParseDoc:
14567 * @sax: the SAX handler block
14568 * @cur: a pointer to an array of xmlChar
14569 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14570 * documents
14571 *
14572 * parse an XML in-memory document and build a tree.
14573 * It use the given SAX function block to handle the parsing callback.
14574 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014575 *
Owen Taylor3473f882001-02-23 17:55:21 +000014576 * Returns the resulting document tree
14577 */
14578
14579xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014580xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014581 xmlDocPtr ret;
14582 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014583 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014584
Daniel Veillard38936062004-11-04 17:45:11 +000014585 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014586
14587
14588 ctxt = xmlCreateDocParserCtxt(cur);
14589 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014590 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014591 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014592 ctxt->sax = sax;
14593 ctxt->userData = NULL;
14594 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014595 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014596
14597 xmlParseDocument(ctxt);
14598 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14599 else {
14600 ret = NULL;
14601 xmlFreeDoc(ctxt->myDoc);
14602 ctxt->myDoc = NULL;
14603 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014604 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014605 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014606 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014607
Owen Taylor3473f882001-02-23 17:55:21 +000014608 return(ret);
14609}
14610
14611/**
14612 * xmlParseDoc:
14613 * @cur: a pointer to an array of xmlChar
14614 *
14615 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014616 *
Owen Taylor3473f882001-02-23 17:55:21 +000014617 * Returns the resulting document tree
14618 */
14619
14620xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014621xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014622 return(xmlSAXParseDoc(NULL, cur, 0));
14623}
Daniel Veillard81273902003-09-30 00:43:48 +000014624#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014625
Daniel Veillard81273902003-09-30 00:43:48 +000014626#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014627/************************************************************************
14628 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014629 * Specific function to keep track of entities references *
14630 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014631 * *
14632 ************************************************************************/
14633
14634static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14635
14636/**
14637 * xmlAddEntityReference:
14638 * @ent : A valid entity
14639 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014640 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014641 *
14642 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14643 */
14644static void
14645xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14646 xmlNodePtr lastNode)
14647{
14648 if (xmlEntityRefFunc != NULL) {
14649 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14650 }
14651}
14652
14653
14654/**
14655 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014656 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014657 *
14658 * Set the function to call call back when a xml reference has been made
14659 */
14660void
14661xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14662{
14663 xmlEntityRefFunc = func;
14664}
Daniel Veillard81273902003-09-30 00:43:48 +000014665#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014666
14667/************************************************************************
14668 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014669 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014670 * *
14671 ************************************************************************/
14672
14673#ifdef LIBXML_XPATH_ENABLED
14674#include <libxml/xpath.h>
14675#endif
14676
Daniel Veillardffa3c742005-07-21 13:24:09 +000014677extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014678static int xmlParserInitialized = 0;
14679
14680/**
14681 * xmlInitParser:
14682 *
14683 * Initialization function for the XML parser.
14684 * This is not reentrant. Call once before processing in case of
14685 * use in multithreaded programs.
14686 */
14687
14688void
14689xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014690 if (xmlParserInitialized != 0)
14691 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014692
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014693#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
Elliott Hughes60f5c162021-08-20 17:09:52 -070014694 if (xmlFree == free)
14695 atexit(xmlCleanupParser);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014696#endif
14697
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014698#ifdef LIBXML_THREAD_ENABLED
14699 __xmlGlobalInitMutexLock();
14700 if (xmlParserInitialized == 0) {
14701#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014702 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014703 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014704 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14705 (xmlGenericError == NULL))
14706 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014707 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014708 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014709 xmlInitCharEncodingHandlers();
14710 xmlDefaultSAXHandlerInit();
14711 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014712#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014713 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014714#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014715#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014716 htmlInitAutoClose();
14717 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014718#endif
14719#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014720 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014721#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014722 xmlParserInitialized = 1;
14723#ifdef LIBXML_THREAD_ENABLED
14724 }
14725 __xmlGlobalInitMutexUnlock();
14726#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014727}
14728
14729/**
14730 * xmlCleanupParser:
14731 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014732 * This function name is somewhat misleading. It does not clean up
14733 * parser state, it cleans up memory allocated by the library itself.
14734 * It is a cleanup function for the XML library. It tries to reclaim all
14735 * related global memory allocated for the library processing.
14736 * It doesn't deallocate any document related memory. One should
14737 * call xmlCleanupParser() only when the process has finished using
14738 * the library and all XML/HTML documents built with it.
14739 * See also xmlInitParser() which has the opposite function of preparing
14740 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014741 *
14742 * WARNING: if your application is multithreaded or has plugin support
14743 * calling this may crash the application if another thread or
14744 * a plugin is still using libxml2. It's sometimes very hard to
14745 * guess if libxml2 is in use in the application, some libraries
14746 * or plugins may use it without notice. In case of doubt abstain
14747 * from calling this function or do it just before calling exit()
14748 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014749 */
14750
14751void
14752xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014753 if (!xmlParserInitialized)
14754 return;
14755
Owen Taylor3473f882001-02-23 17:55:21 +000014756 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014757#ifdef LIBXML_CATALOG_ENABLED
14758 xmlCatalogCleanup();
14759#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014760 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014761 xmlCleanupInputCallbacks();
14762#ifdef LIBXML_OUTPUT_ENABLED
14763 xmlCleanupOutputCallbacks();
14764#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014765#ifdef LIBXML_SCHEMAS_ENABLED
14766 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014767 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014768#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014769 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014770 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014771 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014772 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014773 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014774}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014775
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014776#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14777 !defined(_WIN32)
14778static void
14779ATTRIBUTE_DESTRUCTOR
14780xmlDestructor(void) {
Haibo Huangf0a546b2020-09-01 20:28:19 -070014781 /*
14782 * Calling custom deallocation functions in a destructor can cause
14783 * problems, for example with Nokogiri.
14784 */
14785 if (xmlFree == free)
14786 xmlCleanupParser();
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014787}
14788#endif
14789
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014790/************************************************************************
14791 * *
14792 * New set (2.6.0) of simpler and more flexible APIs *
14793 * *
14794 ************************************************************************/
14795
14796/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014797 * DICT_FREE:
14798 * @str: a string
14799 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014800 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014801 * current scope
14802 */
14803#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014804 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014805 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14806 xmlFree((char *)(str));
14807
14808/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014809 * xmlCtxtReset:
14810 * @ctxt: an XML parser context
14811 *
14812 * Reset a parser context
14813 */
14814void
14815xmlCtxtReset(xmlParserCtxtPtr ctxt)
14816{
14817 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014818 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014819
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014820 if (ctxt == NULL)
14821 return;
14822
14823 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014824
14825 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14826 xmlFreeInputStream(input);
14827 }
14828 ctxt->inputNr = 0;
14829 ctxt->input = NULL;
14830
14831 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014832 if (ctxt->spaceTab != NULL) {
14833 ctxt->spaceTab[0] = -1;
14834 ctxt->space = &ctxt->spaceTab[0];
14835 } else {
14836 ctxt->space = NULL;
14837 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014838
14839
14840 ctxt->nodeNr = 0;
14841 ctxt->node = NULL;
14842
14843 ctxt->nameNr = 0;
14844 ctxt->name = NULL;
14845
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014846 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014847 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014848 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014849 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014850 DICT_FREE(ctxt->directory);
14851 ctxt->directory = NULL;
14852 DICT_FREE(ctxt->extSubURI);
14853 ctxt->extSubURI = NULL;
14854 DICT_FREE(ctxt->extSubSystem);
14855 ctxt->extSubSystem = NULL;
14856 if (ctxt->myDoc != NULL)
14857 xmlFreeDoc(ctxt->myDoc);
14858 ctxt->myDoc = NULL;
14859
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014860 ctxt->standalone = -1;
14861 ctxt->hasExternalSubset = 0;
14862 ctxt->hasPErefs = 0;
14863 ctxt->html = 0;
14864 ctxt->external = 0;
14865 ctxt->instate = XML_PARSER_START;
14866 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014867
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014868 ctxt->wellFormed = 1;
14869 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014870 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014871 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014872#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014873 ctxt->vctxt.userData = ctxt;
14874 ctxt->vctxt.error = xmlParserValidityError;
14875 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014876#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014877 ctxt->record_info = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014878 ctxt->checkIndex = 0;
14879 ctxt->inSubset = 0;
14880 ctxt->errNo = XML_ERR_OK;
14881 ctxt->depth = 0;
14882 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14883 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014884 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014885 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014886 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014887 xmlInitNodeInfoSeq(&ctxt->node_seq);
14888
14889 if (ctxt->attsDefault != NULL) {
Nick Wellnhofere03f0a12017-11-09 16:42:47 +010014890 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014891 ctxt->attsDefault = NULL;
14892 }
14893 if (ctxt->attsSpecial != NULL) {
14894 xmlHashFree(ctxt->attsSpecial, NULL);
14895 ctxt->attsSpecial = NULL;
14896 }
14897
Daniel Veillard4432df22003-09-28 18:58:27 +000014898#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014899 if (ctxt->catalogs != NULL)
14900 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014901#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014902 if (ctxt->lastError.code != XML_ERR_OK)
14903 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014904}
14905
14906/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014907 * xmlCtxtResetPush:
14908 * @ctxt: an XML parser context
14909 * @chunk: a pointer to an array of chars
14910 * @size: number of chars in the array
14911 * @filename: an optional file name or URI
14912 * @encoding: the document encoding, or NULL
14913 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014914 * Reset a push parser context
14915 *
14916 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014917 */
14918int
14919xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14920 int size, const char *filename, const char *encoding)
14921{
14922 xmlParserInputPtr inputStream;
14923 xmlParserInputBufferPtr buf;
14924 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14925
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014926 if (ctxt == NULL)
14927 return(1);
14928
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014929 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14930 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14931
14932 buf = xmlAllocParserInputBuffer(enc);
14933 if (buf == NULL)
14934 return(1);
14935
14936 if (ctxt == NULL) {
14937 xmlFreeParserInputBuffer(buf);
14938 return(1);
14939 }
14940
14941 xmlCtxtReset(ctxt);
14942
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014943 if (filename == NULL) {
14944 ctxt->directory = NULL;
14945 } else {
14946 ctxt->directory = xmlParserGetDirectory(filename);
14947 }
14948
14949 inputStream = xmlNewInputStream(ctxt);
14950 if (inputStream == NULL) {
14951 xmlFreeParserInputBuffer(buf);
14952 return(1);
14953 }
14954
14955 if (filename == NULL)
14956 inputStream->filename = NULL;
14957 else
14958 inputStream->filename = (char *)
14959 xmlCanonicPath((const xmlChar *) filename);
14960 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014961 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014962
14963 inputPush(ctxt, inputStream);
14964
14965 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14966 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014967 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14968 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014969
14970 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14971
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014972 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014973#ifdef DEBUG_PUSH
14974 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14975#endif
14976 }
14977
14978 if (encoding != NULL) {
14979 xmlCharEncodingHandlerPtr hdlr;
14980
Daniel Veillard37334572008-07-31 08:20:02 +000014981 if (ctxt->encoding != NULL)
14982 xmlFree((xmlChar *) ctxt->encoding);
14983 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14984
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014985 hdlr = xmlFindCharEncodingHandler(encoding);
14986 if (hdlr != NULL) {
14987 xmlSwitchToEncoding(ctxt, hdlr);
14988 } else {
14989 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14990 "Unsupported encoding %s\n", BAD_CAST encoding);
14991 }
14992 } else if (enc != XML_CHAR_ENCODING_NONE) {
14993 xmlSwitchEncoding(ctxt, enc);
14994 }
14995
14996 return(0);
14997}
14998
Daniel Veillard37334572008-07-31 08:20:02 +000014999
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015000/**
Daniel Veillard37334572008-07-31 08:20:02 +000015001 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015002 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015003 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015004 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015005 *
15006 * Applies the options to the parser context
15007 *
15008 * Returns 0 in case of success, the set of unknown or unimplemented options
15009 * in case of error.
15010 */
Daniel Veillard37334572008-07-31 08:20:02 +000015011static int
15012xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015013{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015014 if (ctxt == NULL)
15015 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015016 if (encoding != NULL) {
15017 if (ctxt->encoding != NULL)
15018 xmlFree((xmlChar *) ctxt->encoding);
15019 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15020 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015021 if (options & XML_PARSE_RECOVER) {
15022 ctxt->recovery = 1;
15023 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015024 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015025 } else
15026 ctxt->recovery = 0;
15027 if (options & XML_PARSE_DTDLOAD) {
15028 ctxt->loadsubset = XML_DETECT_IDS;
15029 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015030 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015031 } else
15032 ctxt->loadsubset = 0;
15033 if (options & XML_PARSE_DTDATTR) {
15034 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15035 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015036 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015037 }
15038 if (options & XML_PARSE_NOENT) {
15039 ctxt->replaceEntities = 1;
15040 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15041 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015042 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 } else
15044 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015045 if (options & XML_PARSE_PEDANTIC) {
15046 ctxt->pedantic = 1;
15047 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015048 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015049 } else
15050 ctxt->pedantic = 0;
15051 if (options & XML_PARSE_NOBLANKS) {
15052 ctxt->keepBlanks = 0;
15053 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15054 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015055 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015056 } else
15057 ctxt->keepBlanks = 1;
15058 if (options & XML_PARSE_DTDVALID) {
15059 ctxt->validate = 1;
15060 if (options & XML_PARSE_NOWARNING)
15061 ctxt->vctxt.warning = NULL;
15062 if (options & XML_PARSE_NOERROR)
15063 ctxt->vctxt.error = NULL;
15064 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015065 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015066 } else
15067 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015068 if (options & XML_PARSE_NOWARNING) {
15069 ctxt->sax->warning = NULL;
15070 options -= XML_PARSE_NOWARNING;
15071 }
15072 if (options & XML_PARSE_NOERROR) {
15073 ctxt->sax->error = NULL;
15074 ctxt->sax->fatalError = NULL;
15075 options -= XML_PARSE_NOERROR;
15076 }
Daniel Veillard81273902003-09-30 00:43:48 +000015077#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015078 if (options & XML_PARSE_SAX1) {
15079 ctxt->sax->startElement = xmlSAX2StartElement;
15080 ctxt->sax->endElement = xmlSAX2EndElement;
15081 ctxt->sax->startElementNs = NULL;
15082 ctxt->sax->endElementNs = NULL;
15083 ctxt->sax->initialized = 1;
15084 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015085 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015086 }
Daniel Veillard81273902003-09-30 00:43:48 +000015087#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015088 if (options & XML_PARSE_NODICT) {
15089 ctxt->dictNames = 0;
15090 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015091 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015092 } else {
15093 ctxt->dictNames = 1;
15094 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015095 if (options & XML_PARSE_NOCDATA) {
15096 ctxt->sax->cdataBlock = NULL;
15097 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015098 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015099 }
15100 if (options & XML_PARSE_NSCLEAN) {
15101 ctxt->options |= XML_PARSE_NSCLEAN;
15102 options -= XML_PARSE_NSCLEAN;
15103 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015104 if (options & XML_PARSE_NONET) {
15105 ctxt->options |= XML_PARSE_NONET;
15106 options -= XML_PARSE_NONET;
15107 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015108 if (options & XML_PARSE_COMPACT) {
15109 ctxt->options |= XML_PARSE_COMPACT;
15110 options -= XML_PARSE_COMPACT;
15111 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015112 if (options & XML_PARSE_OLD10) {
15113 ctxt->options |= XML_PARSE_OLD10;
15114 options -= XML_PARSE_OLD10;
15115 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015116 if (options & XML_PARSE_NOBASEFIX) {
15117 ctxt->options |= XML_PARSE_NOBASEFIX;
15118 options -= XML_PARSE_NOBASEFIX;
15119 }
15120 if (options & XML_PARSE_HUGE) {
15121 ctxt->options |= XML_PARSE_HUGE;
15122 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015123 if (ctxt->dict != NULL)
15124 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015125 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015126 if (options & XML_PARSE_OLDSAX) {
15127 ctxt->options |= XML_PARSE_OLDSAX;
15128 options -= XML_PARSE_OLDSAX;
15129 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015130 if (options & XML_PARSE_IGNORE_ENC) {
15131 ctxt->options |= XML_PARSE_IGNORE_ENC;
15132 options -= XML_PARSE_IGNORE_ENC;
15133 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015134 if (options & XML_PARSE_BIG_LINES) {
15135 ctxt->options |= XML_PARSE_BIG_LINES;
15136 options -= XML_PARSE_BIG_LINES;
15137 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015138 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015139 return (options);
15140}
15141
15142/**
Daniel Veillard37334572008-07-31 08:20:02 +000015143 * xmlCtxtUseOptions:
15144 * @ctxt: an XML parser context
15145 * @options: a combination of xmlParserOption
15146 *
15147 * Applies the options to the parser context
15148 *
15149 * Returns 0 in case of success, the set of unknown or unimplemented options
15150 * in case of error.
15151 */
15152int
15153xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15154{
15155 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15156}
15157
15158/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015159 * xmlDoRead:
15160 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015161 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015162 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015163 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015164 * @reuse: keep the context for reuse
15165 *
15166 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015167 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015168 * Returns the resulting document tree or NULL
15169 */
15170static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015171xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15172 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015173{
15174 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015175
15176 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015177 if (encoding != NULL) {
15178 xmlCharEncodingHandlerPtr hdlr;
15179
15180 hdlr = xmlFindCharEncodingHandler(encoding);
15181 if (hdlr != NULL)
15182 xmlSwitchToEncoding(ctxt, hdlr);
15183 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015184 if ((URL != NULL) && (ctxt->input != NULL) &&
15185 (ctxt->input->filename == NULL))
15186 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015187 xmlParseDocument(ctxt);
15188 if ((ctxt->wellFormed) || ctxt->recovery)
15189 ret = ctxt->myDoc;
15190 else {
15191 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015192 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015193 xmlFreeDoc(ctxt->myDoc);
15194 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015195 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015196 ctxt->myDoc = NULL;
15197 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015198 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015199 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015200
15201 return (ret);
15202}
15203
15204/**
15205 * xmlReadDoc:
15206 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015207 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015208 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015209 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015210 *
15211 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015212 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015213 * Returns the resulting document tree
15214 */
15215xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015216xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015217{
15218 xmlParserCtxtPtr ctxt;
15219
15220 if (cur == NULL)
15221 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015222 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015223
15224 ctxt = xmlCreateDocParserCtxt(cur);
15225 if (ctxt == NULL)
15226 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015227 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015228}
15229
15230/**
15231 * xmlReadFile:
15232 * @filename: a file or URL
15233 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015234 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015235 *
15236 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015237 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015238 * Returns the resulting document tree
15239 */
15240xmlDocPtr
15241xmlReadFile(const char *filename, const char *encoding, int options)
15242{
15243 xmlParserCtxtPtr ctxt;
15244
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015245 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015246 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247 if (ctxt == NULL)
15248 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015249 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015250}
15251
15252/**
15253 * xmlReadMemory:
15254 * @buffer: a pointer to a char array
15255 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015256 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015257 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015258 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015259 *
15260 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015261 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015262 * Returns the resulting document tree
15263 */
15264xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015265xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266{
15267 xmlParserCtxtPtr ctxt;
15268
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015269 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015270 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15271 if (ctxt == NULL)
15272 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015273 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015274}
15275
15276/**
15277 * xmlReadFd:
15278 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015279 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015280 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015281 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015282 *
15283 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015284 * NOTE that the file descriptor will not be closed when the
15285 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015286 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015287 * Returns the resulting document tree
15288 */
15289xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015290xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291{
15292 xmlParserCtxtPtr ctxt;
15293 xmlParserInputBufferPtr input;
15294 xmlParserInputPtr stream;
15295
15296 if (fd < 0)
15297 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015298 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015299
15300 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15301 if (input == NULL)
15302 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015303 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015304 ctxt = xmlNewParserCtxt();
15305 if (ctxt == NULL) {
15306 xmlFreeParserInputBuffer(input);
15307 return (NULL);
15308 }
15309 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15310 if (stream == NULL) {
15311 xmlFreeParserInputBuffer(input);
15312 xmlFreeParserCtxt(ctxt);
15313 return (NULL);
15314 }
15315 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015316 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015317}
15318
15319/**
15320 * xmlReadIO:
15321 * @ioread: an I/O read function
15322 * @ioclose: an I/O close function
15323 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015324 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015325 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015326 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015327 *
15328 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015329 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015330 * Returns the resulting document tree
15331 */
15332xmlDocPtr
15333xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015334 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015335{
15336 xmlParserCtxtPtr ctxt;
15337 xmlParserInputBufferPtr input;
15338 xmlParserInputPtr stream;
15339
15340 if (ioread == NULL)
15341 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015342 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015343
15344 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15345 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015346 if (input == NULL) {
15347 if (ioclose != NULL)
15348 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015349 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015350 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015351 ctxt = xmlNewParserCtxt();
15352 if (ctxt == NULL) {
15353 xmlFreeParserInputBuffer(input);
15354 return (NULL);
15355 }
15356 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15357 if (stream == NULL) {
15358 xmlFreeParserInputBuffer(input);
15359 xmlFreeParserCtxt(ctxt);
15360 return (NULL);
15361 }
15362 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015363 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015364}
15365
15366/**
15367 * xmlCtxtReadDoc:
15368 * @ctxt: an XML parser context
15369 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015370 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015371 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015372 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015373 *
15374 * parse an XML in-memory document and build a tree.
15375 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015376 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015377 * Returns the resulting document tree
15378 */
15379xmlDocPtr
15380xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015381 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015382{
15383 xmlParserInputPtr stream;
15384
15385 if (cur == NULL)
15386 return (NULL);
15387 if (ctxt == NULL)
15388 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015389 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015390
15391 xmlCtxtReset(ctxt);
15392
15393 stream = xmlNewStringInputStream(ctxt, cur);
15394 if (stream == NULL) {
15395 return (NULL);
15396 }
15397 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015398 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015399}
15400
15401/**
15402 * xmlCtxtReadFile:
15403 * @ctxt: an XML parser context
15404 * @filename: a file or URL
15405 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015406 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015407 *
15408 * parse an XML file from the filesystem or the network.
15409 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015410 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015411 * Returns the resulting document tree
15412 */
15413xmlDocPtr
15414xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15415 const char *encoding, int options)
15416{
15417 xmlParserInputPtr stream;
15418
15419 if (filename == NULL)
15420 return (NULL);
15421 if (ctxt == NULL)
15422 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015423 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015424
15425 xmlCtxtReset(ctxt);
15426
Daniel Veillard29614c72004-11-26 10:47:26 +000015427 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428 if (stream == NULL) {
15429 return (NULL);
15430 }
15431 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015432 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015433}
15434
15435/**
15436 * xmlCtxtReadMemory:
15437 * @ctxt: an XML parser context
15438 * @buffer: a pointer to a char array
15439 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015440 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015441 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015442 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015443 *
15444 * parse an XML in-memory document and build a tree.
15445 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015446 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015447 * Returns the resulting document tree
15448 */
15449xmlDocPtr
15450xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015451 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015452{
15453 xmlParserInputBufferPtr input;
15454 xmlParserInputPtr stream;
15455
15456 if (ctxt == NULL)
15457 return (NULL);
15458 if (buffer == NULL)
15459 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015460 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015461
15462 xmlCtxtReset(ctxt);
15463
15464 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15465 if (input == NULL) {
15466 return(NULL);
15467 }
15468
15469 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15470 if (stream == NULL) {
15471 xmlFreeParserInputBuffer(input);
15472 return(NULL);
15473 }
15474
15475 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015476 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015477}
15478
15479/**
15480 * xmlCtxtReadFd:
15481 * @ctxt: an XML parser context
15482 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015483 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015484 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015485 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015486 *
15487 * parse an XML from a file descriptor and build a tree.
15488 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015489 * NOTE that the file descriptor will not be closed when the
15490 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015491 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015492 * Returns the resulting document tree
15493 */
15494xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015495xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15496 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015497{
15498 xmlParserInputBufferPtr input;
15499 xmlParserInputPtr stream;
15500
15501 if (fd < 0)
15502 return (NULL);
15503 if (ctxt == NULL)
15504 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015505 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015506
15507 xmlCtxtReset(ctxt);
15508
15509
15510 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15511 if (input == NULL)
15512 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015513 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015514 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15515 if (stream == NULL) {
15516 xmlFreeParserInputBuffer(input);
15517 return (NULL);
15518 }
15519 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015520 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015521}
15522
15523/**
15524 * xmlCtxtReadIO:
15525 * @ctxt: an XML parser context
15526 * @ioread: an I/O read function
15527 * @ioclose: an I/O close function
15528 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015529 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015530 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015531 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015532 *
15533 * parse an XML document from I/O functions and source and build a tree.
15534 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015535 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015536 * Returns the resulting document tree
15537 */
15538xmlDocPtr
15539xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15540 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015541 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015542 const char *encoding, int options)
15543{
15544 xmlParserInputBufferPtr input;
15545 xmlParserInputPtr stream;
15546
15547 if (ioread == NULL)
15548 return (NULL);
15549 if (ctxt == NULL)
15550 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015551 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015552
15553 xmlCtxtReset(ctxt);
15554
15555 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15556 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015557 if (input == NULL) {
15558 if (ioclose != NULL)
15559 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015560 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015561 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015562 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15563 if (stream == NULL) {
15564 xmlFreeParserInputBuffer(input);
15565 return (NULL);
15566 }
15567 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015568 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015569}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015570