blob: c9312fa48d9c2803ae7e9e079e3da8247ecf6a93 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Nick Wellnhofere3890542017-10-09 00:20:01 +020041#if defined(_WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Elliott Hughese54f00d2021-05-13 08:13:46 -070090struct _xmlStartTag {
91 const xmlChar *prefix;
92 const xmlChar *URI;
93 int line;
94 int nsNr;
95};
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097static void
98xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99
Rob Richards9c0aa472009-03-26 18:10:19 +0000100static xmlParserCtxtPtr
101xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 const xmlChar *base, xmlParserCtxtPtr pctx);
103
Daniel Veillard28cd9cb2015-11-20 14:55:30 +0800104static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700106static int
107xmlParseElementStart(xmlParserCtxtPtr ctxt);
108
109static void
110xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111
Daniel Veillard0161e632008-08-28 15:36:32 +0000112/************************************************************************
113 * *
114 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
115 * *
116 ************************************************************************/
117
118#define XML_PARSER_BIG_ENTITY 1000
119#define XML_PARSER_LOT_ENTITY 5000
120
121/*
122 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
123 * replacement over the size in byte of the input indicates that you have
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700124 * and exponential behaviour. A value of 10 correspond to at least 3 entity
Daniel Veillard0161e632008-08-28 15:36:32 +0000125 * replacement per byte of input.
126 */
127#define XML_PARSER_NON_LINEAR 10
128
129/*
130 * xmlParserEntityCheck
131 *
132 * Function to check non-linear entity expansion behaviour
133 * This is here to detect and stop exponential linear entity expansion
134 * This is not a limitation of the parser but a safety
135 * boundary feature. It can be disabled with the XML_PARSE_HUGE
136 * parser option.
137 */
138static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800139xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800140 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000141{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800142 size_t consumed = 0;
Elliott Hughese54f00d2021-05-13 08:13:46 -0700143 int i;
Daniel Veillard0161e632008-08-28 15:36:32 +0000144
145 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
146 return (0);
147 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
148 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149
150 /*
151 * This may look absurd but is needed to detect
152 * entities problems
153 */
154 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800155 (ent->content != NULL) && (ent->checked == 0) &&
156 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700157 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800158 xmlChar *rep;
159
160 ent->checked = 1;
161
Peter Simons8f30bdf2016-04-15 11:56:55 +0200162 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800163 rep = xmlStringDecodeEntities(ctxt, ent->content,
164 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200165 --ctxt->depth;
Nick Wellnhofer707ad082018-01-23 16:37:54 +0100166 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbdd66182016-05-23 12:27:58 +0800167 ent->content[0] = 0;
168 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800169
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700170 diff = ctxt->nbentities - oldnbent + 1;
171 if (diff > INT_MAX / 2)
172 diff = INT_MAX / 2;
173 ent->checked = diff * 2;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800174 if (rep != NULL) {
175 if (xmlStrchr(rep, '<'))
176 ent->checked |= 1;
177 xmlFree(rep);
178 rep = NULL;
179 }
180 }
Elliott Hughese54f00d2021-05-13 08:13:46 -0700181
182 /*
183 * Prevent entity exponential check, not just replacement while
184 * parsing the DTD
185 * The check is potentially costly so do that only once in a thousand
186 */
187 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
188 (ctxt->nbentities % 1024 == 0)) {
189 for (i = 0;i < ctxt->inputNr;i++) {
190 consumed += ctxt->inputTab[i]->consumed +
191 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
192 }
193 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
194 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
195 ctxt->instate = XML_PARSER_EOF;
196 return (1);
197 }
198 consumed = 0;
199 }
200
201
202
Daniel Veillard23f05e02013-02-19 10:21:49 +0800203 if (replacement != 0) {
204 if (replacement < XML_MAX_TEXT_LENGTH)
205 return(0);
206
207 /*
208 * If the volume of entity copy reaches 10 times the
209 * amount of parsed data and over the large text threshold
210 * then that's very likely to be an abuse.
211 */
212 if (ctxt->input != NULL) {
213 consumed = ctxt->input->consumed +
214 (ctxt->input->cur - ctxt->input->base);
215 }
216 consumed += ctxt->sizeentities;
217
218 if (replacement < XML_PARSER_NON_LINEAR * consumed)
219 return(0);
220 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000221 /*
222 * Do the check based on the replacement size of the entity
223 */
224 if (size < XML_PARSER_BIG_ENTITY)
225 return(0);
226
227 /*
228 * A limit on the amount of text data reasonably used
229 */
230 if (ctxt->input != NULL) {
231 consumed = ctxt->input->consumed +
232 (ctxt->input->cur - ctxt->input->base);
233 }
234 consumed += ctxt->sizeentities;
235
236 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
237 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
238 return (0);
239 } else if (ent != NULL) {
240 /*
241 * use the number of parsed entities in the replacement
242 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800243 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000244
245 /*
246 * The amount of data parsed counting entities size only once
247 */
248 if (ctxt->input != NULL) {
249 consumed = ctxt->input->consumed +
250 (ctxt->input->cur - ctxt->input->base);
251 }
252 consumed += ctxt->sizeentities;
253
254 /*
255 * Check the density of entities for the amount of data
256 * knowing an entity reference will take at least 3 bytes
257 */
258 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
259 return (0);
260 } else {
261 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800262 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000263 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800264 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
265 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
266 (ctxt->nbentities <= 10000))
267 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000268 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
270 return (1);
271}
272
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000273/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000274 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000275 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000276 * arbitrary depth limit for the XML documents that we allow to
277 * process. This is not a limitation of the parser but a safety
278 * boundary feature. It can be disabled with the XML_PARSE_HUGE
279 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000280 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000281unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard0fb18932003-09-07 09:14:37 +0000283
Daniel Veillard0161e632008-08-28 15:36:32 +0000284
285#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000286#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000287#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000288#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
289
Daniel Veillard1f972e92012-08-15 10:16:37 +0800290/**
291 * XML_PARSER_CHUNK_SIZE
292 *
293 * When calling GROW that's the minimal amount of data
294 * the parser expected to have received. It is not a hard
295 * limit but an optimization when reading strings like Names
296 * It is not strictly needed as long as inputs available characters
297 * are followed by 0, which should be provided by the I/O level
298 */
299#define XML_PARSER_CHUNK_SIZE 100
300
Owen Taylor3473f882001-02-23 17:55:21 +0000301/*
Owen Taylor3473f882001-02-23 17:55:21 +0000302 * List of XML prefixed PI allowed by W3C specs
303 */
304
Daniel Veillardb44025c2001-10-11 22:55:55 +0000305static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000306 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800307 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000308 NULL
309};
310
Daniel Veillarda07050d2003-10-19 14:46:32 +0000311
Owen Taylor3473f882001-02-23 17:55:21 +0000312/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200313static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
314 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000315
Daniel Veillard7d515752003-09-26 19:12:37 +0000316static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000317xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
318 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000319 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000320 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000321
Daniel Veillard37334572008-07-31 08:20:02 +0000322static int
323xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
324 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000325#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000326static void
327xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
328 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000329#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000330
Daniel Veillard7d515752003-09-26 19:12:37 +0000331static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000332xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
333 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000334
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000335static int
336xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
337
Daniel Veillarde57ec792003-09-10 10:50:59 +0000338/************************************************************************
339 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800340 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 * *
342 ************************************************************************/
343
344/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 * xmlErrAttributeDup:
346 * @ctxt: an XML parser context
347 * @prefix: the attribute prefix
348 * @localname: the attribute localname
349 *
350 * Handle a redefinition of attribute error
351 */
352static void
353xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
354 const xmlChar * localname)
355{
Daniel Veillard157fee02003-10-31 10:36:03 +0000356 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
357 (ctxt->instate == XML_PARSER_EOF))
358 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000359 if (ctxt != NULL)
360 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200361
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000362 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200364 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 (const char *) localname, NULL, NULL, 0, 0,
366 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000367 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000368 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200369 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 (const char *) prefix, (const char *) localname,
371 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
372 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
380/**
381 * xmlFatalErr:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @extra: extra information string
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390{
391 const char *errmsg;
392
Daniel Veillard157fee02003-10-31 10:36:03 +0000393 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
394 (ctxt->instate == XML_PARSER_EOF))
395 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 switch (error) {
397 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "internal error";
408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800437 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800443 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800461 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800464 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800467 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000468 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000469 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800470 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000471 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000472 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800473 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000474 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000475 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 errmsg = "Fragment not allowed";
477 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000478 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800479 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000480 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800482 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000484 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800485 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000487 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000489 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000490 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800491 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000493 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800494 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000496 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000499 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800501 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000502 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000503 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000505 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000506 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800507 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000508 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000509 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800510 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000511 break;
512 case XML_ERR_CONDSEC_INVALID_KEYWORD:
513 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000525 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000528 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000530 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000531 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800532 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000534 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800535 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000536 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000537 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800538 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000540 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800541 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000543 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800544 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000545 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000546 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800547 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000548 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000549 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800550 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000551 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000552 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800553 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000554 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000555 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800556 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000557 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000558 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800559 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000560 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000561 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800562 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000563 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000564 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800565 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000566 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800567 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800568 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800569 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000570#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000571 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800572 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000573 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000574#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000575 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800576 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000577 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000578 if (ctxt != NULL)
579 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800580 if (info == NULL) {
581 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
582 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
583 errmsg);
584 } else {
585 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
586 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
587 errmsg, info);
588 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000589 if (ctxt != NULL) {
590 ctxt->wellFormed = 0;
591 if (ctxt->recovery == 0)
592 ctxt->disableSAX = 1;
593 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000594}
595
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596/**
597 * xmlFatalErrMsg:
598 * @ctxt: an XML parser context
599 * @error: the error number
600 * @msg: the error message
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800604static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000605xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000607{
Daniel Veillard157fee02003-10-31 10:36:03 +0000608 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
609 (ctxt->instate == XML_PARSER_EOF))
610 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000611 if (ctxt != NULL)
612 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000613 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200614 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000615 if (ctxt != NULL) {
616 ctxt->wellFormed = 0;
617 if (ctxt->recovery == 0)
618 ctxt->disableSAX = 1;
619 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000620}
621
622/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000623 * xmlWarningMsg:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 * @str2: extra data
629 *
630 * Handle a warning.
631 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800632static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000633xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
634 const char *msg, const xmlChar *str1, const xmlChar *str2)
635{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000636 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000637
Daniel Veillard157fee02003-10-31 10:36:03 +0000638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
642 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000643 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200644 if (ctxt != NULL) {
645 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000646 (ctxt->sax) ? ctxt->sax->warning : NULL,
647 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000648 ctxt, NULL, XML_FROM_PARSER, error,
649 XML_ERR_WARNING, NULL, 0,
650 (const char *) str1, (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200652 } else {
653 __xmlRaiseError(schannel, NULL, NULL,
654 ctxt, NULL, XML_FROM_PARSER, error,
655 XML_ERR_WARNING, NULL, 0,
656 (const char *) str1, (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
658 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000659}
660
661/**
662 * xmlValidityError:
663 * @ctxt: an XML parser context
664 * @error: the error number
665 * @msg: the error message
666 * @str1: extra data
667 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000668 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000669 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800670static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000671xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000672 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000673{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000674 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000675
676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677 (ctxt->instate == XML_PARSER_EOF))
678 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000679 if (ctxt != NULL) {
680 ctxt->errNo = error;
681 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
682 schannel = ctxt->sax->serror;
683 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200684 if (ctxt != NULL) {
685 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000686 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000687 ctxt, NULL, XML_FROM_DTD, error,
688 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000689 (const char *) str2, NULL, 0, 0,
690 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000691 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200692 } else {
693 __xmlRaiseError(schannel, NULL, NULL,
694 ctxt, NULL, XML_FROM_DTD, error,
695 XML_ERR_ERROR, NULL, 0, (const char *) str1,
696 (const char *) str2, NULL, 0, 0,
697 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000698 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000699}
700
701/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000702 * xmlFatalErrMsgInt:
703 * @ctxt: an XML parser context
704 * @error: the error number
705 * @msg: the error message
706 * @val: an integer value
707 *
708 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
709 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800710static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000711xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000712 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000713{
Daniel Veillard157fee02003-10-31 10:36:03 +0000714 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
715 (ctxt->instate == XML_PARSER_EOF))
716 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000717 if (ctxt != NULL)
718 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000719 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000720 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
721 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000722 if (ctxt != NULL) {
723 ctxt->wellFormed = 0;
724 if (ctxt->recovery == 0)
725 ctxt->disableSAX = 1;
726 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000727}
728
729/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000730 * xmlFatalErrMsgStrIntStr:
731 * @ctxt: an XML parser context
732 * @error: the error number
733 * @msg: the error message
734 * @str1: an string info
735 * @val: an integer value
736 * @str2: an string info
737 *
738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
739 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800740static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000741xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800742 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000743 const xmlChar *str2)
744{
Daniel Veillard157fee02003-10-31 10:36:03 +0000745 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
746 (ctxt->instate == XML_PARSER_EOF))
747 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000748 if (ctxt != NULL)
749 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000750 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000751 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
752 NULL, 0, (const char *) str1, (const char *) str2,
753 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000754 if (ctxt != NULL) {
755 ctxt->wellFormed = 0;
756 if (ctxt->recovery == 0)
757 ctxt->disableSAX = 1;
758 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000759}
760
761/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000762 * xmlFatalErrMsgStr:
763 * @ctxt: an XML parser context
764 * @error: the error number
765 * @msg: the error message
766 * @val: a string value
767 *
768 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
769 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800770static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000771xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000772 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000773{
Daniel Veillard157fee02003-10-31 10:36:03 +0000774 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
775 (ctxt->instate == XML_PARSER_EOF))
776 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000777 if (ctxt != NULL)
778 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000779 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000780 XML_FROM_PARSER, error, XML_ERR_FATAL,
781 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
782 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000783 if (ctxt != NULL) {
784 ctxt->wellFormed = 0;
785 if (ctxt->recovery == 0)
786 ctxt->disableSAX = 1;
787 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000788}
789
790/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000791 * xmlErrMsgStr:
792 * @ctxt: an XML parser context
793 * @error: the error number
794 * @msg: the error message
795 * @val: a string value
796 *
797 * Handle a non fatal parser error
798 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800799static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000800xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
801 const char *msg, const xmlChar * val)
802{
Daniel Veillard157fee02003-10-31 10:36:03 +0000803 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
804 (ctxt->instate == XML_PARSER_EOF))
805 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000806 if (ctxt != NULL)
807 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000808 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000809 XML_FROM_PARSER, error, XML_ERR_ERROR,
810 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
811 val);
812}
813
814/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000815 * xmlNsErr:
816 * @ctxt: an XML parser context
817 * @error: the error number
818 * @msg: the message
819 * @info1: extra information string
820 * @info2: extra information string
821 *
822 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
823 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800824static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000825xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
826 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000827 const xmlChar * info1, const xmlChar * info2,
828 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000829{
Daniel Veillard157fee02003-10-31 10:36:03 +0000830 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
831 (ctxt->instate == XML_PARSER_EOF))
832 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000833 if (ctxt != NULL)
834 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000835 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000836 XML_ERR_ERROR, NULL, 0, (const char *) info1,
837 (const char *) info2, (const char *) info3, 0, 0, msg,
838 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000839 if (ctxt != NULL)
840 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000841}
842
Daniel Veillard37334572008-07-31 08:20:02 +0000843/**
844 * xmlNsWarn
845 * @ctxt: an XML parser context
846 * @error: the error number
847 * @msg: the message
848 * @info1: extra information string
849 * @info2: extra information string
850 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800851 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000852 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800853static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000854xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
855 const char *msg,
856 const xmlChar * info1, const xmlChar * info2,
857 const xmlChar * info3)
858{
859 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
860 (ctxt->instate == XML_PARSER_EOF))
861 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000862 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
863 XML_ERR_WARNING, NULL, 0, (const char *) info1,
864 (const char *) info2, (const char *) info3, 0, 0, msg,
865 info1, info2, info3);
866}
867
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000868/************************************************************************
869 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800870 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871 * *
872 ************************************************************************/
873
874/**
875 * xmlHasFeature:
876 * @feature: the feature to be examined
877 *
878 * Examines if the library has been compiled with a given feature.
879 *
880 * Returns a non-zero value if the feature exist, otherwise zero.
881 * Returns zero (0) if the feature does not exist or an unknown
882 * unknown feature is requested, non-zero otherwise.
883 */
884int
885xmlHasFeature(xmlFeature feature)
886{
887 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_THREAD_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_TREE_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_OUTPUT_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_PUSH_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_READER_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_PATTERN_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef LIBXML_WRITER_ENABLED
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_SAX1_ENABLED
932 return(1);
933#else
934 return(0);
935#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000936 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000937#ifdef LIBXML_FTP_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000942 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000943#ifdef LIBXML_HTTP_ENABLED
944 return(1);
945#else
946 return(0);
947#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000948 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000949#ifdef LIBXML_VALID_ENABLED
950 return(1);
951#else
952 return(0);
953#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000954 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000955#ifdef LIBXML_HTML_ENABLED
956 return(1);
957#else
958 return(0);
959#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000960 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000961#ifdef LIBXML_LEGACY_ENABLED
962 return(1);
963#else
964 return(0);
965#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000966 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000967#ifdef LIBXML_C14N_ENABLED
968 return(1);
969#else
970 return(0);
971#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000972 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000973#ifdef LIBXML_CATALOG_ENABLED
974 return(1);
975#else
976 return(0);
977#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000978 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000979#ifdef LIBXML_XPATH_ENABLED
980 return(1);
981#else
982 return(0);
983#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000984 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000985#ifdef LIBXML_XPTR_ENABLED
986 return(1);
987#else
988 return(0);
989#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000990 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000991#ifdef LIBXML_XINCLUDE_ENABLED
992 return(1);
993#else
994 return(0);
995#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000996 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000997#ifdef LIBXML_ICONV_ENABLED
998 return(1);
999#else
1000 return(0);
1001#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001002 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001003#ifdef LIBXML_ISO8859X_ENABLED
1004 return(1);
1005#else
1006 return(0);
1007#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001008 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001009#ifdef LIBXML_UNICODE_ENABLED
1010 return(1);
1011#else
1012 return(0);
1013#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001014 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001015#ifdef LIBXML_REGEXP_ENABLED
1016 return(1);
1017#else
1018 return(0);
1019#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001020 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001021#ifdef LIBXML_AUTOMATA_ENABLED
1022 return(1);
1023#else
1024 return(0);
1025#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001026 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001027#ifdef LIBXML_EXPR_ENABLED
1028 return(1);
1029#else
1030 return(0);
1031#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001032 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001033#ifdef LIBXML_SCHEMAS_ENABLED
1034 return(1);
1035#else
1036 return(0);
1037#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001038 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001039#ifdef LIBXML_SCHEMATRON_ENABLED
1040 return(1);
1041#else
1042 return(0);
1043#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001044 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001045#ifdef LIBXML_MODULES_ENABLED
1046 return(1);
1047#else
1048 return(0);
1049#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001050 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001051#ifdef LIBXML_DEBUG_ENABLED
1052 return(1);
1053#else
1054 return(0);
1055#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001056 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001057#ifdef DEBUG_MEMORY_LOCATION
1058 return(1);
1059#else
1060 return(0);
1061#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001062 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001063#ifdef LIBXML_DEBUG_RUNTIME
1064 return(1);
1065#else
1066 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001067#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001068 case XML_WITH_ZLIB:
1069#ifdef LIBXML_ZLIB_ENABLED
1070 return(1);
1071#else
1072 return(0);
1073#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001074 case XML_WITH_LZMA:
1075#ifdef LIBXML_LZMA_ENABLED
1076 return(1);
1077#else
1078 return(0);
1079#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001080 case XML_WITH_ICU:
1081#ifdef LIBXML_ICU_ENABLED
1082 return(1);
1083#else
1084 return(0);
1085#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001086 default:
1087 break;
1088 }
1089 return(0);
1090}
1091
1092/************************************************************************
1093 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001094 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001095 * *
1096 ************************************************************************/
1097
1098/**
1099 * xmlDetectSAX2:
1100 * @ctxt: an XML parser context
1101 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001102 * Do the SAX2 detection and specific initialization
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103 */
1104static void
1105xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
Haibo Huangf0a546b2020-09-01 20:28:19 -07001106 xmlSAXHandlerPtr sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001107 if (ctxt == NULL) return;
Haibo Huangf0a546b2020-09-01 20:28:19 -07001108 sax = ctxt->sax;
Daniel Veillard81273902003-09-30 00:43:48 +00001109#ifdef LIBXML_SAX1_ENABLED
Haibo Huangf0a546b2020-09-01 20:28:19 -07001110 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1111 ((sax->startElementNs != NULL) ||
1112 (sax->endElementNs != NULL) ||
1113 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1114 ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001115#else
1116 ctxt->sax2 = 1;
1117#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001118
1119 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1120 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1121 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001122 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1123 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001124 xmlErrMemory(ctxt, NULL);
1125 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001126}
1127
Daniel Veillarde57ec792003-09-10 10:50:59 +00001128typedef struct _xmlDefAttrs xmlDefAttrs;
1129typedef xmlDefAttrs *xmlDefAttrsPtr;
1130struct _xmlDefAttrs {
1131 int nbAttrs; /* number of defaulted attributes on that element */
1132 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001133#if __STDC_VERSION__ >= 199901L
1134 /* Using a C99 flexible array member avoids UBSan errors. */
1135 const xmlChar *values[]; /* array of localname/prefix/values/external */
1136#else
1137 const xmlChar *values[5];
1138#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001140
1141/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001142 * xmlAttrNormalizeSpace:
1143 * @src: the source string
1144 * @dst: the target string
1145 *
1146 * Normalize the space in non CDATA attribute values:
1147 * If the attribute type is not CDATA, then the XML processor MUST further
1148 * process the normalized attribute value by discarding any leading and
1149 * trailing space (#x20) characters, and by replacing sequences of space
1150 * (#x20) characters by a single space (#x20) character.
1151 * Note that the size of dst need to be at least src, and if one doesn't need
1152 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1153 * passing src as dst is just fine.
1154 *
1155 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1156 * is needed.
1157 */
1158static xmlChar *
1159xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1160{
1161 if ((src == NULL) || (dst == NULL))
1162 return(NULL);
1163
1164 while (*src == 0x20) src++;
1165 while (*src != 0) {
1166 if (*src == 0x20) {
1167 while (*src == 0x20) src++;
1168 if (*src != 0)
1169 *dst++ = 0x20;
1170 } else {
1171 *dst++ = *src++;
1172 }
1173 }
1174 *dst = 0;
1175 if (dst == src)
1176 return(NULL);
1177 return(dst);
1178}
1179
1180/**
1181 * xmlAttrNormalizeSpace2:
1182 * @src: the source string
1183 *
1184 * Normalize the space in non CDATA attribute values, a slightly more complex
1185 * front end to avoid allocation problems when running on attribute values
1186 * coming from the input.
1187 *
1188 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1189 * is needed.
1190 */
1191static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001192xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001193{
1194 int i;
1195 int remove_head = 0;
1196 int need_realloc = 0;
1197 const xmlChar *cur;
1198
1199 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1200 return(NULL);
1201 i = *len;
1202 if (i <= 0)
1203 return(NULL);
1204
1205 cur = src;
1206 while (*cur == 0x20) {
1207 cur++;
1208 remove_head++;
1209 }
1210 while (*cur != 0) {
1211 if (*cur == 0x20) {
1212 cur++;
1213 if ((*cur == 0x20) || (*cur == 0)) {
1214 need_realloc = 1;
1215 break;
1216 }
1217 } else
1218 cur++;
1219 }
1220 if (need_realloc) {
1221 xmlChar *ret;
1222
1223 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1224 if (ret == NULL) {
1225 xmlErrMemory(ctxt, NULL);
1226 return(NULL);
1227 }
1228 xmlAttrNormalizeSpace(ret, ret);
1229 *len = (int) strlen((const char *)ret);
1230 return(ret);
1231 } else if (remove_head) {
1232 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001233 memmove(src, src + remove_head, 1 + *len);
1234 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001235 }
1236 return(NULL);
1237}
1238
1239/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001240 * xmlAddDefAttrs:
1241 * @ctxt: an XML parser context
1242 * @fullname: the element fullname
1243 * @fullattr: the attribute fullname
1244 * @value: the attribute value
1245 *
1246 * Add a defaulted attribute for an element
1247 */
1248static void
1249xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1250 const xmlChar *fullname,
1251 const xmlChar *fullattr,
1252 const xmlChar *value) {
1253 xmlDefAttrsPtr defaults;
1254 int len;
1255 const xmlChar *name;
1256 const xmlChar *prefix;
1257
Daniel Veillard6a31b832008-03-26 14:06:44 +00001258 /*
1259 * Allows to detect attribute redefinitions
1260 */
1261 if (ctxt->attsSpecial != NULL) {
1262 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1263 return;
1264 }
1265
Daniel Veillarde57ec792003-09-10 10:50:59 +00001266 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001267 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001268 if (ctxt->attsDefault == NULL)
1269 goto mem_error;
1270 }
1271
1272 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001273 * split the element name into prefix:localname , the string found
1274 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001275 */
1276 name = xmlSplitQName3(fullname, &len);
1277 if (name == NULL) {
1278 name = xmlDictLookup(ctxt->dict, fullname, -1);
1279 prefix = NULL;
1280 } else {
1281 name = xmlDictLookup(ctxt->dict, name, -1);
1282 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1283 }
1284
1285 /*
1286 * make sure there is some storage
1287 */
1288 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1289 if (defaults == NULL) {
1290 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001291 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001292 if (defaults == NULL)
1293 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001294 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001295 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001296 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1297 defaults, NULL) < 0) {
1298 xmlFree(defaults);
1299 goto mem_error;
1300 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001301 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001302 xmlDefAttrsPtr temp;
1303
1304 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001305 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001306 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001307 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001308 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001309 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001310 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1311 defaults, NULL) < 0) {
1312 xmlFree(defaults);
1313 goto mem_error;
1314 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001315 }
1316
1317 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001318 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001319 * are within the DTD and hen not associated to namespace names.
1320 */
1321 name = xmlSplitQName3(fullattr, &len);
1322 if (name == NULL) {
1323 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1324 prefix = NULL;
1325 } else {
1326 name = xmlDictLookup(ctxt->dict, name, -1);
1327 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1328 }
1329
Daniel Veillardae0765b2008-07-31 19:54:59 +00001330 defaults->values[5 * defaults->nbAttrs] = name;
1331 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001332 /* intern the string and precompute the end */
1333 len = xmlStrlen(value);
1334 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001335 defaults->values[5 * defaults->nbAttrs + 2] = value;
1336 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1337 if (ctxt->external)
1338 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1339 else
1340 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001341 defaults->nbAttrs++;
1342
1343 return;
1344
1345mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001346 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001347 return;
1348}
1349
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001350/**
1351 * xmlAddSpecialAttr:
1352 * @ctxt: an XML parser context
1353 * @fullname: the element fullname
1354 * @fullattr: the attribute fullname
1355 * @type: the attribute type
1356 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001357 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001358 */
1359static void
1360xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1361 const xmlChar *fullname,
1362 const xmlChar *fullattr,
1363 int type)
1364{
1365 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001366 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001367 if (ctxt->attsSpecial == NULL)
1368 goto mem_error;
1369 }
1370
Daniel Veillardac4118d2008-01-11 05:27:32 +00001371 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1372 return;
1373
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001374 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001375 (void *) (ptrdiff_t) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001376 return;
1377
1378mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001379 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001380 return;
1381}
1382
Daniel Veillard4432df22003-09-28 18:58:27 +00001383/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001384 * xmlCleanSpecialAttrCallback:
1385 *
1386 * Removes CDATA attributes from the special attribute table
1387 */
1388static void
1389xmlCleanSpecialAttrCallback(void *payload, void *data,
1390 const xmlChar *fullname, const xmlChar *fullattr,
1391 const xmlChar *unused ATTRIBUTE_UNUSED) {
1392 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1393
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001394 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001395 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1396 }
1397}
1398
1399/**
1400 * xmlCleanSpecialAttr:
1401 * @ctxt: an XML parser context
1402 *
1403 * Trim the list of attributes defined to remove all those of type
1404 * CDATA as they are not special. This call should be done when finishing
1405 * to parse the DTD and before starting to parse the document root.
1406 */
1407static void
1408xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1409{
1410 if (ctxt->attsSpecial == NULL)
1411 return;
1412
1413 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1414
1415 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1416 xmlHashFree(ctxt->attsSpecial, NULL);
1417 ctxt->attsSpecial = NULL;
1418 }
1419 return;
1420}
1421
1422/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001423 * xmlCheckLanguageID:
1424 * @lang: pointer to the string value
1425 *
1426 * Checks that the value conforms to the LanguageID production:
1427 *
1428 * NOTE: this is somewhat deprecated, those productions were removed from
1429 * the XML Second edition.
1430 *
1431 * [33] LanguageID ::= Langcode ('-' Subcode)*
1432 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1433 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1434 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1435 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1436 * [38] Subcode ::= ([a-z] | [A-Z])+
1437 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001438 * The current REC reference the successors of RFC 1766, currently 5646
Daniel Veillard60587d62010-11-04 15:16:27 +01001439 *
1440 * http://www.rfc-editor.org/rfc/rfc5646.txt
1441 * langtag = language
1442 * ["-" script]
1443 * ["-" region]
1444 * *("-" variant)
1445 * *("-" extension)
1446 * ["-" privateuse]
1447 * language = 2*3ALPHA ; shortest ISO 639 code
1448 * ["-" extlang] ; sometimes followed by
1449 * ; extended language subtags
1450 * / 4ALPHA ; or reserved for future use
1451 * / 5*8ALPHA ; or registered language subtag
1452 *
1453 * extlang = 3ALPHA ; selected ISO 639 codes
1454 * *2("-" 3ALPHA) ; permanently reserved
1455 *
1456 * script = 4ALPHA ; ISO 15924 code
1457 *
1458 * region = 2ALPHA ; ISO 3166-1 code
1459 * / 3DIGIT ; UN M.49 code
1460 *
1461 * variant = 5*8alphanum ; registered variants
1462 * / (DIGIT 3alphanum)
1463 *
1464 * extension = singleton 1*("-" (2*8alphanum))
1465 *
1466 * ; Single alphanumerics
1467 * ; "x" reserved for private use
1468 * singleton = DIGIT ; 0 - 9
1469 * / %x41-57 ; A - W
1470 * / %x59-5A ; Y - Z
1471 * / %x61-77 ; a - w
1472 * / %x79-7A ; y - z
1473 *
1474 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1475 * The parser below doesn't try to cope with extension or privateuse
1476 * that could be added but that's not interoperable anyway
1477 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001478 * Returns 1 if correct 0 otherwise
1479 **/
1480int
1481xmlCheckLanguageID(const xmlChar * lang)
1482{
Daniel Veillard60587d62010-11-04 15:16:27 +01001483 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001484
1485 if (cur == NULL)
1486 return (0);
1487 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001488 ((cur[0] == 'I') && (cur[1] == '-')) ||
1489 ((cur[0] == 'x') && (cur[1] == '-')) ||
1490 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001491 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001492 * Still allow IANA code and user code which were coming
1493 * from the previous version of the XML-1.0 specification
1494 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001495 */
1496 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001497 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001498 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1499 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001500 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001501 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001502 nxt = cur;
1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505 nxt++;
1506 if (nxt - cur >= 4) {
1507 /*
1508 * Reserved
1509 */
1510 if ((nxt - cur > 8) || (nxt[0] != 0))
1511 return(0);
1512 return(1);
1513 }
1514 if (nxt - cur < 2)
1515 return(0);
1516 /* we got an ISO 639 code */
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521
1522 nxt++;
1523 cur = nxt;
1524 /* now we can have extlang or script or region or variant */
1525 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526 goto region_m49;
1527
1528 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530 nxt++;
1531 if (nxt - cur == 4)
1532 goto script;
1533 if (nxt - cur == 2)
1534 goto region;
1535 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1536 goto variant;
1537 if (nxt - cur != 3)
1538 return(0);
1539 /* we parsed an extlang */
1540 if (nxt[0] == 0)
1541 return(1);
1542 if (nxt[0] != '-')
1543 return(0);
1544
1545 nxt++;
1546 cur = nxt;
1547 /* now we can have script or region or variant */
1548 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1549 goto region_m49;
1550
1551 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1552 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1553 nxt++;
1554 if (nxt - cur == 2)
1555 goto region;
1556 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1557 goto variant;
1558 if (nxt - cur != 4)
1559 return(0);
1560 /* we parsed a script */
1561script:
1562 if (nxt[0] == 0)
1563 return(1);
1564 if (nxt[0] != '-')
1565 return(0);
1566
1567 nxt++;
1568 cur = nxt;
1569 /* now we can have region or variant */
1570 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1571 goto region_m49;
1572
1573 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1574 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1575 nxt++;
1576
1577 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1578 goto variant;
1579 if (nxt - cur != 2)
1580 return(0);
1581 /* we parsed a region */
1582region:
1583 if (nxt[0] == 0)
1584 return(1);
1585 if (nxt[0] != '-')
1586 return(0);
1587
1588 nxt++;
1589 cur = nxt;
1590 /* now we can just have a variant */
1591 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1592 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1593 nxt++;
1594
1595 if ((nxt - cur < 5) || (nxt - cur > 8))
1596 return(0);
1597
1598 /* we parsed a variant */
1599variant:
1600 if (nxt[0] == 0)
1601 return(1);
1602 if (nxt[0] != '-')
1603 return(0);
1604 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001605 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001606
1607region_m49:
1608 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1609 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1610 nxt += 3;
1611 goto region;
1612 }
1613 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001614}
1615
Owen Taylor3473f882001-02-23 17:55:21 +00001616/************************************************************************
1617 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001618 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001619 * *
1620 ************************************************************************/
1621
Daniel Veillard8ed10722009-08-20 19:17:36 +02001622static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1623 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001624
Daniel Veillard0fb18932003-09-07 09:14:37 +00001625#ifdef SAX2
1626/**
1627 * nsPush:
1628 * @ctxt: an XML parser context
1629 * @prefix: the namespace prefix or NULL
1630 * @URL: the namespace name
1631 *
1632 * Pushes a new parser namespace on top of the ns stack
1633 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001634 * Returns -1 in case of error, -2 if the namespace should be discarded
1635 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 */
1637static int
1638nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1639{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001640 if (ctxt->options & XML_PARSE_NSCLEAN) {
1641 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001642 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001643 if (ctxt->nsTab[i] == prefix) {
1644 /* in scope */
1645 if (ctxt->nsTab[i + 1] == URL)
1646 return(-2);
1647 /* out of scope keep it */
1648 break;
1649 }
1650 }
1651 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001652 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1653 ctxt->nsMax = 10;
1654 ctxt->nsNr = 0;
1655 ctxt->nsTab = (const xmlChar **)
1656 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1657 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001658 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001659 ctxt->nsMax = 0;
1660 return (-1);
1661 }
1662 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001663 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001664 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001665 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1666 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1667 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001668 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001669 ctxt->nsMax /= 2;
1670 return (-1);
1671 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001672 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001673 }
1674 ctxt->nsTab[ctxt->nsNr++] = prefix;
1675 ctxt->nsTab[ctxt->nsNr++] = URL;
1676 return (ctxt->nsNr);
1677}
1678/**
1679 * nsPop:
1680 * @ctxt: an XML parser context
1681 * @nr: the number to pop
1682 *
1683 * Pops the top @nr parser prefix/namespace from the ns stack
1684 *
1685 * Returns the number of namespaces removed
1686 */
1687static int
1688nsPop(xmlParserCtxtPtr ctxt, int nr)
1689{
1690 int i;
1691
1692 if (ctxt->nsTab == NULL) return(0);
1693 if (ctxt->nsNr < nr) {
1694 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1695 nr = ctxt->nsNr;
1696 }
1697 if (ctxt->nsNr <= 0)
1698 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001699
Daniel Veillard0fb18932003-09-07 09:14:37 +00001700 for (i = 0;i < nr;i++) {
1701 ctxt->nsNr--;
1702 ctxt->nsTab[ctxt->nsNr] = NULL;
1703 }
1704 return(nr);
1705}
1706#endif
1707
1708static int
1709xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1710 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001711 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001712 int maxatts;
1713
1714 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001715 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001716 atts = (const xmlChar **)
1717 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001718 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001719 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001720 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1721 if (attallocs == NULL) goto mem_error;
1722 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001723 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001724 } else if (nr + 5 > ctxt->maxatts) {
1725 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001726 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1727 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001728 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001729 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001730 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1731 (maxatts / 5) * sizeof(int));
1732 if (attallocs == NULL) goto mem_error;
1733 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001734 ctxt->maxatts = maxatts;
1735 }
1736 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001737mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001738 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001739 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001740}
1741
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001742/**
1743 * inputPush:
1744 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001745 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001746 *
1747 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001748 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001750 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001751int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001752inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1753{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001754 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001755 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756 if (ctxt->inputNr >= ctxt->inputMax) {
1757 ctxt->inputMax *= 2;
1758 ctxt->inputTab =
1759 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1760 ctxt->inputMax *
1761 sizeof(ctxt->inputTab[0]));
1762 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001763 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001764 xmlFreeInputStream(value);
1765 ctxt->inputMax /= 2;
1766 value = NULL;
1767 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001768 }
1769 }
1770 ctxt->inputTab[ctxt->inputNr] = value;
1771 ctxt->input = value;
1772 return (ctxt->inputNr++);
1773}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001774/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001775 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001776 * @ctxt: an XML parser context
1777 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001778 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001779 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001780 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001781 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001782xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783inputPop(xmlParserCtxtPtr ctxt)
1784{
1785 xmlParserInputPtr ret;
1786
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001787 if (ctxt == NULL)
1788 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001789 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001790 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001791 ctxt->inputNr--;
1792 if (ctxt->inputNr > 0)
1793 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1794 else
1795 ctxt->input = NULL;
1796 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001797 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001798 return (ret);
1799}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001800/**
1801 * nodePush:
1802 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001803 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001804 *
1805 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001806 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001807 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001808 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001809int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001810nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1811{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001812 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001813 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001814 xmlNodePtr *tmp;
1815
1816 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1817 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001819 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001820 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001821 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001822 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001823 ctxt->nodeTab = tmp;
1824 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001825 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001826 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1827 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001828 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001829 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001830 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001831 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001832 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001833 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001834 ctxt->nodeTab[ctxt->nodeNr] = value;
1835 ctxt->node = value;
1836 return (ctxt->nodeNr++);
1837}
Daniel Veillard8915c152008-08-26 13:05:34 +00001838
Daniel Veillard1c732d22002-11-30 11:22:59 +00001839/**
1840 * nodePop:
1841 * @ctxt: an XML parser context
1842 *
1843 * Pops the top element node from the node stack
1844 *
1845 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001846 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001847xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001848nodePop(xmlParserCtxtPtr ctxt)
1849{
1850 xmlNodePtr ret;
1851
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001852 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001853 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001854 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001855 ctxt->nodeNr--;
1856 if (ctxt->nodeNr > 0)
1857 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1858 else
1859 ctxt->node = NULL;
1860 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001861 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001862 return (ret);
1863}
Daniel Veillarda2351322004-06-27 12:08:10 +00001864
Daniel Veillard1c732d22002-11-30 11:22:59 +00001865/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001866 * nameNsPush:
1867 * @ctxt: an XML parser context
1868 * @value: the element name
1869 * @prefix: the element prefix
1870 * @URI: the element namespace name
Elliott Hughese54f00d2021-05-13 08:13:46 -07001871 * @line: the current line number for error messages
1872 * @nsNr: the number of namespaces pushed on the namespace table
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873 *
1874 * Pushes a new element name/prefix/URL on top of the name stack
1875 *
1876 * Returns -1 in case of error, the index in the stack otherwise
1877 */
1878static int
1879nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
Elliott Hughese54f00d2021-05-13 08:13:46 -07001880 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001881{
Elliott Hughese54f00d2021-05-13 08:13:46 -07001882 xmlStartTag *tag;
1883
Daniel Veillarde57ec792003-09-10 10:50:59 +00001884 if (ctxt->nameNr >= ctxt->nameMax) {
1885 const xmlChar * *tmp;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001886 xmlStartTag *tmp2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001887 ctxt->nameMax *= 2;
1888 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1889 ctxt->nameMax *
1890 sizeof(ctxt->nameTab[0]));
1891 if (tmp == NULL) {
1892 ctxt->nameMax /= 2;
1893 goto mem_error;
1894 }
1895 ctxt->nameTab = tmp;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001896 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1897 ctxt->nameMax *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001898 sizeof(ctxt->pushTab[0]));
1899 if (tmp2 == NULL) {
1900 ctxt->nameMax /= 2;
1901 goto mem_error;
1902 }
1903 ctxt->pushTab = tmp2;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001904 } else if (ctxt->pushTab == NULL) {
Elliott Hughese54f00d2021-05-13 08:13:46 -07001905 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001906 sizeof(ctxt->pushTab[0]));
1907 if (ctxt->pushTab == NULL)
1908 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001909 }
1910 ctxt->nameTab[ctxt->nameNr] = value;
1911 ctxt->name = value;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001912 tag = &ctxt->pushTab[ctxt->nameNr];
1913 tag->prefix = prefix;
1914 tag->URI = URI;
1915 tag->line = line;
1916 tag->nsNr = nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001917 return (ctxt->nameNr++);
1918mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001919 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001920 return (-1);
1921}
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001922#ifdef LIBXML_PUSH_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001923/**
1924 * nameNsPop:
1925 * @ctxt: an XML parser context
1926 *
1927 * Pops the top element/prefix/URI name from the name stack
1928 *
1929 * Returns the name just removed
1930 */
1931static const xmlChar *
1932nameNsPop(xmlParserCtxtPtr ctxt)
1933{
1934 const xmlChar *ret;
1935
1936 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001937 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001938 ctxt->nameNr--;
1939 if (ctxt->nameNr > 0)
1940 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1941 else
1942 ctxt->name = NULL;
1943 ret = ctxt->nameTab[ctxt->nameNr];
1944 ctxt->nameTab[ctxt->nameNr] = NULL;
1945 return (ret);
1946}
Daniel Veillarda2351322004-06-27 12:08:10 +00001947#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001948
1949/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001950 * namePush:
1951 * @ctxt: an XML parser context
1952 * @value: the element name
1953 *
1954 * Pushes a new element name on top of the name stack
1955 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001956 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001957 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001958int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001959namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001960{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001961 if (ctxt == NULL) return (-1);
1962
Daniel Veillard1c732d22002-11-30 11:22:59 +00001963 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001964 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001965 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001966 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001967 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001968 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001969 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001970 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001971 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001972 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001973 }
1974 ctxt->nameTab[ctxt->nameNr] = value;
1975 ctxt->name = value;
1976 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001977mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001978 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001979 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001980}
1981/**
1982 * namePop:
1983 * @ctxt: an XML parser context
1984 *
1985 * Pops the top element name from the name stack
1986 *
1987 * Returns the name just removed
1988 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001989const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001990namePop(xmlParserCtxtPtr ctxt)
1991{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001992 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001993
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001994 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1995 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001996 ctxt->nameNr--;
1997 if (ctxt->nameNr > 0)
1998 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1999 else
2000 ctxt->name = NULL;
2001 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00002002 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00002003 return (ret);
2004}
Owen Taylor3473f882001-02-23 17:55:21 +00002005
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002006static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002007 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002008 int *tmp;
2009
Owen Taylor3473f882001-02-23 17:55:21 +00002010 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002011 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2012 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2013 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002014 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002015 ctxt->spaceMax /=2;
2016 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002017 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002018 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002019 }
2020 ctxt->spaceTab[ctxt->spaceNr] = val;
2021 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2022 return(ctxt->spaceNr++);
2023}
2024
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002025static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002026 int ret;
2027 if (ctxt->spaceNr <= 0) return(0);
2028 ctxt->spaceNr--;
2029 if (ctxt->spaceNr > 0)
2030 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2031 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00002032 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002033 ret = ctxt->spaceTab[ctxt->spaceNr];
2034 ctxt->spaceTab[ctxt->spaceNr] = -1;
2035 return(ret);
2036}
2037
2038/*
2039 * Macros for accessing the content. Those should be used only by the parser,
2040 * and not exported.
2041 *
2042 * Dirty macros, i.e. one often need to make assumption on the context to
2043 * use them
2044 *
2045 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2046 * To be used with extreme caution since operations consuming
2047 * characters may move the input buffer to a different location !
2048 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2049 * This should be used internally by the parser
2050 * only to compare to ASCII values otherwise it would break when
2051 * running with UTF-8 encoding.
2052 * RAW same as CUR but in the input buffer, bypass any token
2053 * extraction that may have been done
2054 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2055 * to compare on ASCII based substring.
2056 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002057 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002058 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002059 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002060 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2061 *
2062 * NEXT Skip to the next character, this does the proper decoding
2063 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002064 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002065 * CUR_CHAR(l) returns the current unicode character (int), set l
2066 * to the number of xmlChars used for the encoding [0-5].
2067 * CUR_SCHAR same but operate on a string instead of the context
2068 * COPY_BUF copy the current unicode char to the target buffer, increment
2069 * the index
2070 * GROW, SHRINK handling of input buffers
2071 */
2072
Daniel Veillardfdc91562002-07-01 21:52:03 +00002073#define RAW (*ctxt->input->cur)
2074#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002075#define NXT(val) ctxt->input->cur[(val)]
2076#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002077#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002078
Daniel Veillarda07050d2003-10-19 14:46:32 +00002079#define CMP4( s, c1, c2, c3, c4 ) \
2080 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2081 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2082#define CMP5( s, c1, c2, c3, c4, c5 ) \
2083 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2084#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2085 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2086#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2087 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2088#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2089 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2090#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2091 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2092 ((unsigned char *) s)[ 8 ] == c9 )
2093#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2094 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2095 ((unsigned char *) s)[ 9 ] == c10 )
2096
Owen Taylor3473f882001-02-23 17:55:21 +00002097#define SKIP(val) do { \
Haibo Huangf0a546b2020-09-01 20:28:19 -07002098 ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002099 if (*ctxt->input->cur == 0) \
2100 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002101 } while (0)
2102
Daniel Veillard0b787f32004-03-26 17:29:53 +00002103#define SKIPL(val) do { \
2104 int skipl; \
2105 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002106 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002107 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002108 } else ctxt->input->col++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002109 ctxt->input->cur++; \
2110 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002111 if (*ctxt->input->cur == 0) \
2112 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002113 } while (0)
2114
Daniel Veillarda880b122003-04-21 21:36:41 +00002115#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002116 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2117 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002118 xmlSHRINK (ctxt);
2119
2120static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2121 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002122 if (*ctxt->input->cur == 0)
2123 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2124}
Owen Taylor3473f882001-02-23 17:55:21 +00002125
Daniel Veillarda880b122003-04-21 21:36:41 +00002126#define GROW if ((ctxt->progressive == 0) && \
2127 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002128 xmlGROW (ctxt);
2129
2130static void xmlGROW (xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002131 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2132 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
Longstreth Jon190a0b82014-02-06 10:58:17 +01002133
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002134 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2135 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
Vlad Tsyrklevich28f52fe2017-08-10 15:08:48 -07002136 ((ctxt->input->buf) &&
2137 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002138 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2139 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002140 xmlHaltParser(ctxt);
2141 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002142 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002143 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002144 if ((ctxt->input->cur > ctxt->input->end) ||
2145 (ctxt->input->cur < ctxt->input->base)) {
2146 xmlHaltParser(ctxt);
2147 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2148 return;
2149 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002150 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2151 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002152}
Owen Taylor3473f882001-02-23 17:55:21 +00002153
2154#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2155
2156#define NEXT xmlNextChar(ctxt)
2157
Daniel Veillard21a0f912001-02-25 19:54:14 +00002158#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002159 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002160 ctxt->input->cur++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002161 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2163 }
2164
Owen Taylor3473f882001-02-23 17:55:21 +00002165#define NEXTL(l) do { \
2166 if (*(ctxt->input->cur) == '\n') { \
2167 ctxt->input->line++; ctxt->input->col = 1; \
2168 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002169 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002170 } while (0)
2171
2172#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2173#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2174
2175#define COPY_BUF(l,b,i,v) \
2176 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002177 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002178
2179/**
2180 * xmlSkipBlankChars:
2181 * @ctxt: the XML parser context
2182 *
2183 * skip all blanks character found at that point in the input streams.
2184 * It pops up finished entities in the process if allowable at that point.
2185 *
2186 * Returns the number of space chars skipped
2187 */
2188
2189int
2190xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002191 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002192
2193 /*
2194 * It's Okay to use CUR/NEXT here since all the blanks are on
2195 * the ASCII range.
2196 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002197 if (ctxt->instate != XML_PARSER_DTD) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002198 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002199 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002200 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002201 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002202 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002203 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002204 if (*cur == '\n') {
2205 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002206 } else {
2207 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002208 }
2209 cur++;
2210 res++;
2211 if (*cur == 0) {
2212 ctxt->input->cur = cur;
2213 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2214 cur = ctxt->input->cur;
2215 }
2216 }
2217 ctxt->input->cur = cur;
2218 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002219 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2220
2221 while (1) {
2222 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002223 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002224 } else if (CUR == '%') {
2225 /*
2226 * Need to handle support of entities branching here
2227 */
2228 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2229 break;
2230 xmlParsePEReference(ctxt);
2231 } else if (CUR == 0) {
2232 if (ctxt->inputNr <= 1)
2233 break;
2234 xmlPopInput(ctxt);
2235 } else {
2236 break;
2237 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002238
2239 /*
2240 * Also increase the counter when entering or exiting a PERef.
2241 * The spec says: "When a parameter-entity reference is recognized
2242 * in the DTD and included, its replacement text MUST be enlarged
2243 * by the attachment of one leading and one following space (#x20)
2244 * character."
2245 */
2246 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002247 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002248 }
Owen Taylor3473f882001-02-23 17:55:21 +00002249 return(res);
2250}
2251
2252/************************************************************************
2253 * *
2254 * Commodity functions to handle entities *
2255 * *
2256 ************************************************************************/
2257
2258/**
2259 * xmlPopInput:
2260 * @ctxt: an XML parser context
2261 *
2262 * xmlPopInput: the current input pointed by ctxt->input came to an end
2263 * pop it and return the next char.
2264 *
2265 * Returns the current xmlChar in the parser context
2266 */
2267xmlChar
2268xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002269 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002270 if (xmlParserDebugEntities)
2271 xmlGenericError(xmlGenericErrorContext,
2272 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002273 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2274 (ctxt->instate != XML_PARSER_EOF))
2275 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2276 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002277 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002278 if (*ctxt->input->cur == 0)
2279 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002280 return(CUR);
2281}
2282
2283/**
2284 * xmlPushInput:
2285 * @ctxt: an XML parser context
2286 * @input: an XML parser input fragment (entity, XML fragment ...).
2287 *
2288 * xmlPushInput: switch to a new input stream which is stacked on top
2289 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002290 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002291 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002292int
Owen Taylor3473f882001-02-23 17:55:21 +00002293xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002294 int ret;
2295 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002296
2297 if (xmlParserDebugEntities) {
2298 if ((ctxt->input != NULL) && (ctxt->input->filename))
2299 xmlGenericError(xmlGenericErrorContext,
2300 "%s(%d): ", ctxt->input->filename,
2301 ctxt->input->line);
2302 xmlGenericError(xmlGenericErrorContext,
2303 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2304 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002305 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2306 (ctxt->inputNr > 1024)) {
2307 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2308 while (ctxt->inputNr > 1)
2309 xmlFreeInputStream(inputPop(ctxt));
2310 return(-1);
2311 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002312 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002313 if (ctxt->instate == XML_PARSER_EOF)
2314 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002315 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002316 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002317}
2318
2319/**
2320 * xmlParseCharRef:
2321 * @ctxt: an XML parser context
2322 *
2323 * parse Reference declarations
2324 *
2325 * [66] CharRef ::= '&#' [0-9]+ ';' |
2326 * '&#x' [0-9a-fA-F]+ ';'
2327 *
2328 * [ WFC: Legal Character ]
2329 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002330 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002331 *
2332 * Returns the value parsed (as an int), 0 in case of error
2333 */
2334int
2335xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002336 int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002337 int count = 0;
2338
Owen Taylor3473f882001-02-23 17:55:21 +00002339 /*
2340 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2341 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002342 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002343 (NXT(2) == 'x')) {
2344 SKIP(3);
2345 GROW;
2346 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002347 if (count++ > 20) {
2348 count = 0;
2349 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002350 if (ctxt->instate == XML_PARSER_EOF)
2351 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002352 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002353 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002354 val = val * 16 + (CUR - '0');
2355 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2356 val = val * 16 + (CUR - 'a') + 10;
2357 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2358 val = val * 16 + (CUR - 'A') + 10;
2359 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002360 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 val = 0;
2362 break;
2363 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002364 if (val > 0x110000)
2365 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002366
Owen Taylor3473f882001-02-23 17:55:21 +00002367 NEXT;
2368 count++;
2369 }
2370 if (RAW == ';') {
2371 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002372 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002373 ctxt->input->cur++;
2374 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002375 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002376 SKIP(2);
2377 GROW;
2378 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002379 if (count++ > 20) {
2380 count = 0;
2381 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002382 if (ctxt->instate == XML_PARSER_EOF)
2383 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002384 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002385 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002386 val = val * 10 + (CUR - '0');
2387 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002388 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002389 val = 0;
2390 break;
2391 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002392 if (val > 0x110000)
2393 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002394
Owen Taylor3473f882001-02-23 17:55:21 +00002395 NEXT;
2396 count++;
2397 }
2398 if (RAW == ';') {
2399 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002400 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002401 ctxt->input->cur++;
2402 }
2403 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002404 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002405 }
2406
2407 /*
2408 * [ WFC: Legal Character ]
2409 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002410 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002411 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002412 if (val >= 0x110000) {
2413 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2414 "xmlParseCharRef: character reference out of bounds\n",
2415 val);
2416 } else if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002417 return(val);
2418 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002419 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2420 "xmlParseCharRef: invalid xmlChar value %d\n",
2421 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002422 }
2423 return(0);
2424}
2425
2426/**
2427 * xmlParseStringCharRef:
2428 * @ctxt: an XML parser context
2429 * @str: a pointer to an index in the string
2430 *
2431 * parse Reference declarations, variant parsing from a string rather
2432 * than an an input flow.
2433 *
2434 * [66] CharRef ::= '&#' [0-9]+ ';' |
2435 * '&#x' [0-9a-fA-F]+ ';'
2436 *
2437 * [ WFC: Legal Character ]
2438 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002439 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002440 *
2441 * Returns the value parsed (as an int), 0 in case of error, str will be
2442 * updated to the current value of the index
2443 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002444static int
Owen Taylor3473f882001-02-23 17:55:21 +00002445xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2446 const xmlChar *ptr;
2447 xmlChar cur;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002448 int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002449
2450 if ((str == NULL) || (*str == NULL)) return(0);
2451 ptr = *str;
2452 cur = *ptr;
2453 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2454 ptr += 3;
2455 cur = *ptr;
2456 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002457 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002458 val = val * 16 + (cur - '0');
2459 else if ((cur >= 'a') && (cur <= 'f'))
2460 val = val * 16 + (cur - 'a') + 10;
2461 else if ((cur >= 'A') && (cur <= 'F'))
2462 val = val * 16 + (cur - 'A') + 10;
2463 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002464 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002465 val = 0;
2466 break;
2467 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002468 if (val > 0x110000)
2469 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002470
Owen Taylor3473f882001-02-23 17:55:21 +00002471 ptr++;
2472 cur = *ptr;
2473 }
2474 if (cur == ';')
2475 ptr++;
2476 } else if ((cur == '&') && (ptr[1] == '#')){
2477 ptr += 2;
2478 cur = *ptr;
2479 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002480 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002481 val = val * 10 + (cur - '0');
2482 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002483 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002484 val = 0;
2485 break;
2486 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002487 if (val > 0x110000)
2488 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002489
Owen Taylor3473f882001-02-23 17:55:21 +00002490 ptr++;
2491 cur = *ptr;
2492 }
2493 if (cur == ';')
2494 ptr++;
2495 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002496 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 return(0);
2498 }
2499 *str = ptr;
2500
2501 /*
2502 * [ WFC: Legal Character ]
2503 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002504 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002505 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002506 if (val >= 0x110000) {
2507 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2508 "xmlParseStringCharRef: character reference out of bounds\n",
2509 val);
2510 } else if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002511 return(val);
2512 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002513 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2514 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2515 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 }
2517 return(0);
2518}
2519
2520/**
2521 * xmlParserHandlePEReference:
2522 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002523 *
Owen Taylor3473f882001-02-23 17:55:21 +00002524 * [69] PEReference ::= '%' Name ';'
2525 *
2526 * [ WFC: No Recursion ]
2527 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002528 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002529 *
2530 * [ WFC: Entity Declared ]
2531 * In a document without any DTD, a document with only an internal DTD
2532 * subset which contains no parameter entity references, or a document
2533 * with "standalone='yes'", ... ... The declaration of a parameter
2534 * entity must precede any reference to it...
2535 *
2536 * [ VC: Entity Declared ]
2537 * In a document with an external subset or external parameter entities
2538 * with "standalone='no'", ... ... The declaration of a parameter entity
2539 * must precede any reference to it...
2540 *
2541 * [ WFC: In DTD ]
2542 * Parameter-entity references may only appear in the DTD.
2543 * NOTE: misleading but this is handled.
2544 *
2545 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002546 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002547 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002548 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002549 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002550 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002551 */
2552void
2553xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002554 switch(ctxt->instate) {
2555 case XML_PARSER_CDATA_SECTION:
2556 return;
2557 case XML_PARSER_COMMENT:
2558 return;
2559 case XML_PARSER_START_TAG:
2560 return;
2561 case XML_PARSER_END_TAG:
2562 return;
2563 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002564 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002565 return;
2566 case XML_PARSER_PROLOG:
2567 case XML_PARSER_START:
2568 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002569 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002570 return;
2571 case XML_PARSER_ENTITY_DECL:
2572 case XML_PARSER_CONTENT:
2573 case XML_PARSER_ATTRIBUTE_VALUE:
2574 case XML_PARSER_PI:
2575 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002576 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002577 /* we just ignore it there */
2578 return;
2579 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002580 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002581 return;
2582 case XML_PARSER_ENTITY_VALUE:
2583 /*
2584 * NOTE: in the case of entity values, we don't do the
2585 * substitution here since we need the literal
2586 * entity value to be able to save the internal
2587 * subset of the document.
2588 * This will be handled by xmlStringDecodeEntities
2589 */
2590 return;
2591 case XML_PARSER_DTD:
2592 /*
2593 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2594 * In the internal DTD subset, parameter-entity references
2595 * can occur only where markup declarations can occur, not
2596 * within markup declarations.
2597 * In that case this is handled in xmlParseMarkupDecl
2598 */
2599 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2600 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002601 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002602 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002603 break;
2604 case XML_PARSER_IGNORE:
2605 return;
2606 }
2607
Nick Wellnhofer03904152017-06-05 21:16:00 +02002608 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002609}
2610
2611/*
2612 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002613 * buffer##_size is expected to be a size_t
2614 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002615 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002616#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002617 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002618 size_t new_size = buffer##_size * 2 + n; \
2619 if (new_size < buffer##_size) goto mem_error; \
2620 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002621 if (tmp == NULL) goto mem_error; \
2622 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002623 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002624}
2625
2626/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002627 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002628 * @ctxt: the parser context
2629 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002630 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002631 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2632 * @end: an end marker xmlChar, 0 if none
2633 * @end2: an end marker xmlChar, 0 if none
2634 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002635 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002636 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002637 *
2638 * [67] Reference ::= EntityRef | CharRef
2639 *
2640 * [69] PEReference ::= '%' Name ';'
2641 *
2642 * Returns A newly allocated string with the substitution done. The caller
2643 * must deallocate it !
2644 */
2645xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002646xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2647 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002648 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002649 size_t buffer_size = 0;
2650 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002651
2652 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002653 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002654 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002655 xmlEntityPtr ent;
2656 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002657
Daniel Veillarda82b1822004-11-08 16:24:57 +00002658 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002659 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002660 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002661
Daniel Veillard0161e632008-08-28 15:36:32 +00002662 if (((ctxt->depth > 40) &&
2663 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2664 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002665 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002666 return(NULL);
2667 }
2668
2669 /*
2670 * allocate a translation buffer.
2671 */
2672 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002673 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002674 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002675
2676 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002677 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002678 * we are operating on already parsed values.
2679 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002680 if (str < last)
2681 c = CUR_SCHAR(str, l);
2682 else
2683 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002684 while ((c != 0) && (c != end) && /* non input consuming loop */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002685 (c != end2) && (c != end3) &&
2686 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002687
2688 if (c == 0) break;
2689 if ((c == '&') && (str[1] == '#')) {
2690 int val = xmlParseStringCharRef(ctxt, &str);
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002691 if (val == 0)
2692 goto int_error;
2693 COPY_BUF(0,buffer,nbchars,val);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002694 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002695 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002696 }
Owen Taylor3473f882001-02-23 17:55:21 +00002697 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2698 if (xmlParserDebugEntities)
2699 xmlGenericError(xmlGenericErrorContext,
2700 "String decoding Entity Reference: %.30s\n",
2701 str);
2702 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002703 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002704 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002705 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002706 if ((ent != NULL) &&
2707 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2708 if (ent->content != NULL) {
2709 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002710 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002711 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002712 }
Owen Taylor3473f882001-02-23 17:55:21 +00002713 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002714 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2715 "predefined entity has no content\n");
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002716 goto int_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002717 }
2718 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002719 ctxt->depth++;
2720 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2721 0, 0, 0);
2722 ctxt->depth--;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002723 if (rep == NULL) {
2724 ent->content[0] = 0;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002725 goto int_error;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002726 }
Daniel Veillard0161e632008-08-28 15:36:32 +00002727
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002728 current = rep;
2729 while (*current != 0) { /* non input consuming loop */
2730 buffer[nbchars++] = *current++;
2731 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2732 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2733 goto int_error;
2734 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2735 }
2736 }
2737 xmlFree(rep);
2738 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002739 } else if (ent != NULL) {
2740 int i = xmlStrlen(ent->name);
2741 const xmlChar *cur = ent->name;
2742
2743 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002744 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002745 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002746 }
2747 for (;i > 0;i--)
2748 buffer[nbchars++] = *cur++;
2749 buffer[nbchars++] = ';';
2750 }
2751 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2752 if (xmlParserDebugEntities)
2753 xmlGenericError(xmlGenericErrorContext,
2754 "String decoding PE Reference: %.30s\n", str);
2755 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002756 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002757 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002758 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002759 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002760 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002761 /*
2762 * Note: external parsed entities will not be loaded,
2763 * it is not required for a non-validating parser to
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002764 * complete external PEReferences coming from the
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002765 * internal subset
2766 */
2767 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2768 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2769 (ctxt->validate != 0)) {
2770 xmlLoadEntityContent(ctxt, ent);
2771 } else {
2772 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2773 "not validating will not read content for PE entity %s\n",
2774 ent->name, NULL);
2775 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002776 }
Owen Taylor3473f882001-02-23 17:55:21 +00002777 ctxt->depth++;
2778 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2779 0, 0, 0);
2780 ctxt->depth--;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002781 if (rep == NULL) {
2782 if (ent->content != NULL)
2783 ent->content[0] = 0;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002784 goto int_error;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002785 }
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002786 current = rep;
2787 while (*current != 0) { /* non input consuming loop */
2788 buffer[nbchars++] = *current++;
2789 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2791 goto int_error;
2792 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2793 }
2794 }
2795 xmlFree(rep);
2796 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002797 }
2798 } else {
2799 COPY_BUF(l,buffer,nbchars,c);
2800 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002801 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2802 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002803 }
2804 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002805 if (str < last)
2806 c = CUR_SCHAR(str, l);
2807 else
2808 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002809 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002810 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002811 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002812
2813mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002814 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002815int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002816 if (rep != NULL)
2817 xmlFree(rep);
2818 if (buffer != NULL)
2819 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002820 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002821}
2822
Daniel Veillarde57ec792003-09-10 10:50:59 +00002823/**
2824 * xmlStringDecodeEntities:
2825 * @ctxt: the parser context
2826 * @str: the input string
2827 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2828 * @end: an end marker xmlChar, 0 if none
2829 * @end2: an end marker xmlChar, 0 if none
2830 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002831 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002832 * Takes a entity string content and process to do the adequate substitutions.
2833 *
2834 * [67] Reference ::= EntityRef | CharRef
2835 *
2836 * [69] PEReference ::= '%' Name ';'
2837 *
2838 * Returns A newly allocated string with the substitution done. The caller
2839 * must deallocate it !
2840 */
2841xmlChar *
2842xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2843 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002844 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002845 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2846 end, end2, end3));
2847}
Owen Taylor3473f882001-02-23 17:55:21 +00002848
2849/************************************************************************
2850 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002851 * Commodity functions, cleanup needed ? *
2852 * *
2853 ************************************************************************/
2854
2855/**
2856 * areBlanks:
2857 * @ctxt: an XML parser context
2858 * @str: a xmlChar *
2859 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002860 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002861 *
2862 * Is this a sequence of blank chars that one can ignore ?
2863 *
2864 * Returns 1 if ignorable 0 otherwise.
2865 */
2866
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002867static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2868 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002869 int i, ret;
2870 xmlNodePtr lastChild;
2871
Daniel Veillard05c13a22001-09-09 08:38:09 +00002872 /*
2873 * Don't spend time trying to differentiate them, the same callback is
2874 * used !
2875 */
2876 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002877 return(0);
2878
Owen Taylor3473f882001-02-23 17:55:21 +00002879 /*
2880 * Check for xml:space value.
2881 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002882 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2883 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002884 return(0);
2885
2886 /*
2887 * Check that the string is made of blanks
2888 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002889 if (blank_chars == 0) {
2890 for (i = 0;i < len;i++)
2891 if (!(IS_BLANK_CH(str[i]))) return(0);
2892 }
Owen Taylor3473f882001-02-23 17:55:21 +00002893
2894 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002895 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002896 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002897 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002898 if (ctxt->myDoc != NULL) {
2899 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2900 if (ret == 0) return(1);
2901 if (ret == 1) return(0);
2902 }
2903
2904 /*
2905 * Otherwise, heuristic :-\
2906 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002907 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002908 if ((ctxt->node->children == NULL) &&
2909 (RAW == '<') && (NXT(1) == '/')) return(0);
2910
2911 lastChild = xmlGetLastChild(ctxt->node);
2912 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002913 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2914 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002915 } else if (xmlNodeIsText(lastChild))
2916 return(0);
2917 else if ((ctxt->node->children != NULL) &&
2918 (xmlNodeIsText(ctxt->node->children)))
2919 return(0);
2920 return(1);
2921}
2922
Owen Taylor3473f882001-02-23 17:55:21 +00002923/************************************************************************
2924 * *
2925 * Extra stuff for namespace support *
2926 * Relates to http://www.w3.org/TR/WD-xml-names *
2927 * *
2928 ************************************************************************/
2929
2930/**
2931 * xmlSplitQName:
2932 * @ctxt: an XML parser context
2933 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002934 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002935 *
2936 * parse an UTF8 encoded XML qualified name string
2937 *
2938 * [NS 5] QName ::= (Prefix ':')? LocalPart
2939 *
2940 * [NS 6] Prefix ::= NCName
2941 *
2942 * [NS 7] LocalPart ::= NCName
2943 *
2944 * Returns the local part, and prefix is updated
2945 * to get the Prefix if any.
2946 */
2947
2948xmlChar *
2949xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2950 xmlChar buf[XML_MAX_NAMELEN + 5];
2951 xmlChar *buffer = NULL;
2952 int len = 0;
2953 int max = XML_MAX_NAMELEN;
2954 xmlChar *ret = NULL;
2955 const xmlChar *cur = name;
2956 int c;
2957
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002958 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002959 *prefix = NULL;
2960
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002961 if (cur == NULL) return(NULL);
2962
Owen Taylor3473f882001-02-23 17:55:21 +00002963#ifndef XML_XML_NAMESPACE
2964 /* xml: prefix is not really a namespace */
2965 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2966 (cur[2] == 'l') && (cur[3] == ':'))
2967 return(xmlStrdup(name));
2968#endif
2969
Daniel Veillard597bc482003-07-24 16:08:28 +00002970 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002971 if (cur[0] == ':')
2972 return(xmlStrdup(name));
2973
2974 c = *cur++;
2975 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2976 buf[len++] = c;
2977 c = *cur++;
2978 }
2979 if (len >= max) {
2980 /*
2981 * Okay someone managed to make a huge name, so he's ready to pay
2982 * for the processing speed.
2983 */
2984 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002985
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002986 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002987 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002988 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002989 return(NULL);
2990 }
2991 memcpy(buffer, buf, len);
2992 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2993 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002994 xmlChar *tmp;
2995
Owen Taylor3473f882001-02-23 17:55:21 +00002996 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002997 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002998 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002999 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003000 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003001 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003002 return(NULL);
3003 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003004 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003005 }
3006 buffer[len++] = c;
3007 c = *cur++;
3008 }
3009 buffer[len] = 0;
3010 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003011
Daniel Veillard597bc482003-07-24 16:08:28 +00003012 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003013 if (buffer != NULL)
3014 xmlFree(buffer);
3015 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003016 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003017 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003018
Owen Taylor3473f882001-02-23 17:55:21 +00003019 if (buffer == NULL)
3020 ret = xmlStrndup(buf, len);
3021 else {
3022 ret = buffer;
3023 buffer = NULL;
3024 max = XML_MAX_NAMELEN;
3025 }
3026
3027
3028 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003029 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003030 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003031 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003032 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003033 }
Owen Taylor3473f882001-02-23 17:55:21 +00003034 len = 0;
3035
Daniel Veillardbb284f42002-10-16 18:02:47 +00003036 /*
3037 * Check that the first character is proper to start
3038 * a new name
3039 */
3040 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3041 ((c >= 0x41) && (c <= 0x5A)) ||
3042 (c == '_') || (c == ':'))) {
3043 int l;
3044 int first = CUR_SCHAR(cur, l);
3045
3046 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003047 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003048 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003049 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003050 }
3051 }
3052 cur++;
3053
Owen Taylor3473f882001-02-23 17:55:21 +00003054 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3055 buf[len++] = c;
3056 c = *cur++;
3057 }
3058 if (len >= max) {
3059 /*
3060 * Okay someone managed to make a huge name, so he's ready to pay
3061 * for the processing speed.
3062 */
3063 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003064
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003065 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003066 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003067 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003068 return(NULL);
3069 }
3070 memcpy(buffer, buf, len);
3071 while (c != 0) { /* tested bigname2.xml */
3072 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003073 xmlChar *tmp;
3074
Owen Taylor3473f882001-02-23 17:55:21 +00003075 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003076 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003077 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003078 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003079 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003080 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003081 return(NULL);
3082 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003083 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003084 }
3085 buffer[len++] = c;
3086 c = *cur++;
3087 }
3088 buffer[len] = 0;
3089 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003090
Owen Taylor3473f882001-02-23 17:55:21 +00003091 if (buffer == NULL)
3092 ret = xmlStrndup(buf, len);
3093 else {
3094 ret = buffer;
3095 }
3096 }
3097
3098 return(ret);
3099}
3100
3101/************************************************************************
3102 * *
3103 * The parser itself *
3104 * Relates to http://www.w3.org/TR/REC-xml *
3105 * *
3106 ************************************************************************/
3107
Daniel Veillard34e3f642008-07-29 09:02:27 +00003108/************************************************************************
3109 * *
3110 * Routines to parse Name, NCName and NmToken *
3111 * *
3112 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003113#ifdef DEBUG
3114static unsigned long nbParseName = 0;
3115static unsigned long nbParseNmToken = 0;
3116static unsigned long nbParseNCName = 0;
3117static unsigned long nbParseNCNameComplex = 0;
3118static unsigned long nbParseNameComplex = 0;
3119static unsigned long nbParseStringName = 0;
3120#endif
3121
Daniel Veillard34e3f642008-07-29 09:02:27 +00003122/*
3123 * The two following functions are related to the change of accepted
3124 * characters for Name and NmToken in the Revision 5 of XML-1.0
3125 * They correspond to the modified production [4] and the new production [4a]
3126 * changes in that revision. Also note that the macros used for the
3127 * productions Letter, Digit, CombiningChar and Extender are not needed
3128 * anymore.
3129 * We still keep compatibility to pre-revision5 parsing semantic if the
3130 * new XML_PARSE_OLD10 option is given to the parser.
3131 */
3132static int
3133xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3134 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3135 /*
3136 * Use the new checks of production [4] [4a] amd [5] of the
3137 * Update 5 of XML-1.0
3138 */
3139 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3140 (((c >= 'a') && (c <= 'z')) ||
3141 ((c >= 'A') && (c <= 'Z')) ||
3142 (c == '_') || (c == ':') ||
3143 ((c >= 0xC0) && (c <= 0xD6)) ||
3144 ((c >= 0xD8) && (c <= 0xF6)) ||
3145 ((c >= 0xF8) && (c <= 0x2FF)) ||
3146 ((c >= 0x370) && (c <= 0x37D)) ||
3147 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3148 ((c >= 0x200C) && (c <= 0x200D)) ||
3149 ((c >= 0x2070) && (c <= 0x218F)) ||
3150 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3151 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3152 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3153 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3154 ((c >= 0x10000) && (c <= 0xEFFFF))))
3155 return(1);
3156 } else {
3157 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3158 return(1);
3159 }
3160 return(0);
3161}
3162
3163static int
3164xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166 /*
3167 * Use the new checks of production [4] [4a] amd [5] of the
3168 * Update 5 of XML-1.0
3169 */
3170 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3171 (((c >= 'a') && (c <= 'z')) ||
3172 ((c >= 'A') && (c <= 'Z')) ||
3173 ((c >= '0') && (c <= '9')) || /* !start */
3174 (c == '_') || (c == ':') ||
3175 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3176 ((c >= 0xC0) && (c <= 0xD6)) ||
3177 ((c >= 0xD8) && (c <= 0xF6)) ||
3178 ((c >= 0xF8) && (c <= 0x2FF)) ||
3179 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3180 ((c >= 0x370) && (c <= 0x37D)) ||
3181 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3182 ((c >= 0x200C) && (c <= 0x200D)) ||
3183 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3184 ((c >= 0x2070) && (c <= 0x218F)) ||
3185 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3186 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3187 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3188 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3189 ((c >= 0x10000) && (c <= 0xEFFFF))))
3190 return(1);
3191 } else {
3192 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3193 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003194 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003195 (IS_COMBINING(c)) ||
3196 (IS_EXTENDER(c)))
3197 return(1);
3198 }
3199 return(0);
3200}
3201
Daniel Veillarde57ec792003-09-10 10:50:59 +00003202static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003203 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003204
Daniel Veillard34e3f642008-07-29 09:02:27 +00003205static const xmlChar *
3206xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3207 int len = 0, l;
3208 int c;
3209 int count = 0;
3210
Daniel Veillardc6561462009-03-25 10:22:31 +00003211#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003212 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003213#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003214
3215 /*
3216 * Handler for more complex cases
3217 */
3218 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003219 if (ctxt->instate == XML_PARSER_EOF)
3220 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003221 c = CUR_CHAR(l);
3222 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223 /*
3224 * Use the new checks of production [4] [4a] amd [5] of the
3225 * Update 5 of XML-1.0
3226 */
3227 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228 (!(((c >= 'a') && (c <= 'z')) ||
3229 ((c >= 'A') && (c <= 'Z')) ||
3230 (c == '_') || (c == ':') ||
3231 ((c >= 0xC0) && (c <= 0xD6)) ||
3232 ((c >= 0xD8) && (c <= 0xF6)) ||
3233 ((c >= 0xF8) && (c <= 0x2FF)) ||
3234 ((c >= 0x370) && (c <= 0x37D)) ||
3235 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236 ((c >= 0x200C) && (c <= 0x200D)) ||
3237 ((c >= 0x2070) && (c <= 0x218F)) ||
3238 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243 return(NULL);
3244 }
3245 len += l;
3246 NEXTL(l);
3247 c = CUR_CHAR(l);
3248 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249 (((c >= 'a') && (c <= 'z')) ||
3250 ((c >= 'A') && (c <= 'Z')) ||
3251 ((c >= '0') && (c <= '9')) || /* !start */
3252 (c == '_') || (c == ':') ||
3253 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254 ((c >= 0xC0) && (c <= 0xD6)) ||
3255 ((c >= 0xD8) && (c <= 0xF6)) ||
3256 ((c >= 0xF8) && (c <= 0x2FF)) ||
3257 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258 ((c >= 0x370) && (c <= 0x37D)) ||
3259 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260 ((c >= 0x200C) && (c <= 0x200D)) ||
3261 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262 ((c >= 0x2070) && (c <= 0x218F)) ||
3263 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267 ((c >= 0x10000) && (c <= 0xEFFFF))
3268 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003269 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003270 count = 0;
3271 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003272 if (ctxt->instate == XML_PARSER_EOF)
3273 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003274 }
3275 len += l;
3276 NEXTL(l);
3277 c = CUR_CHAR(l);
3278 }
3279 } else {
3280 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3281 (!IS_LETTER(c) && (c != '_') &&
3282 (c != ':'))) {
3283 return(NULL);
3284 }
3285 len += l;
3286 NEXTL(l);
3287 c = CUR_CHAR(l);
3288
3289 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3290 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3291 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003292 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003293 (IS_COMBINING(c)) ||
3294 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003295 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003296 count = 0;
3297 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003298 if (ctxt->instate == XML_PARSER_EOF)
3299 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003300 }
3301 len += l;
3302 NEXTL(l);
3303 c = CUR_CHAR(l);
3304 }
3305 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003306 if ((len > XML_MAX_NAME_LENGTH) &&
3307 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3308 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309 return(NULL);
3310 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003311 if (ctxt->input->cur - ctxt->input->base < len) {
3312 /*
3313 * There were a couple of bugs where PERefs lead to to a change
3314 * of the buffer. Check the buffer size to avoid passing an invalid
3315 * pointer to xmlDictLookup.
3316 */
3317 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3318 "unexpected change of input buffer");
3319 return (NULL);
3320 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003321 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3324}
3325
Owen Taylor3473f882001-02-23 17:55:21 +00003326/**
3327 * xmlParseName:
3328 * @ctxt: an XML parser context
3329 *
3330 * parse an XML name.
3331 *
3332 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3333 * CombiningChar | Extender
3334 *
3335 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3336 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003337 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003338 *
3339 * Returns the Name parsed or NULL
3340 */
3341
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003342const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003343xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003344 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003345 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003346 int count = 0;
3347
3348 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003349
Daniel Veillardc6561462009-03-25 10:22:31 +00003350#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003351 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003352#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003353
Daniel Veillard48b2f892001-02-25 16:11:03 +00003354 /*
3355 * Accelerator for simple ASCII names
3356 */
3357 in = ctxt->input->cur;
3358 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3359 ((*in >= 0x41) && (*in <= 0x5A)) ||
3360 (*in == '_') || (*in == ':')) {
3361 in++;
3362 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3363 ((*in >= 0x41) && (*in <= 0x5A)) ||
3364 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003365 (*in == '_') || (*in == '-') ||
3366 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003367 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003368 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003369 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003370 if ((count > XML_MAX_NAME_LENGTH) &&
3371 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3372 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3373 return(NULL);
3374 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003375 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003376 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003377 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003378 if (ret == NULL)
3379 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003380 return(ret);
3381 }
3382 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003384 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003385}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003386
Daniel Veillard34e3f642008-07-29 09:02:27 +00003387static const xmlChar *
3388xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3389 int len = 0, l;
3390 int c;
3391 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003392 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003393
Daniel Veillardc6561462009-03-25 10:22:31 +00003394#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003395 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003396#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003397
3398 /*
3399 * Handler for more complex cases
3400 */
3401 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003402 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003403 c = CUR_CHAR(l);
3404 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3405 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3406 return(NULL);
3407 }
3408
3409 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3410 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003411 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003412 if ((len > XML_MAX_NAME_LENGTH) &&
3413 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3414 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3415 return(NULL);
3416 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003417 count = 0;
3418 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003419 if (ctxt->instate == XML_PARSER_EOF)
3420 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003421 }
3422 len += l;
3423 NEXTL(l);
3424 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003425 if (c == 0) {
3426 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003427 /*
3428 * when shrinking to extend the buffer we really need to preserve
3429 * the part of the name we already parsed. Hence rolling back
Haibo Huangcfd91dc2020-07-30 23:01:33 -07003430 * by current length.
Daniel Veillard51f02b02015-09-15 16:50:32 +08003431 */
3432 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003433 GROW;
3434 if (ctxt->instate == XML_PARSER_EOF)
3435 return(NULL);
Nick Wellnhofer132af1a2018-01-08 18:48:01 +01003436 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003437 c = CUR_CHAR(l);
3438 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003439 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003440 if ((len > XML_MAX_NAME_LENGTH) &&
3441 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3442 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3443 return(NULL);
3444 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003445 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003446}
3447
3448/**
3449 * xmlParseNCName:
3450 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003451 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003452 *
3453 * parse an XML name.
3454 *
3455 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3456 * CombiningChar | Extender
3457 *
3458 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3459 *
3460 * Returns the Name parsed or NULL
3461 */
3462
3463static const xmlChar *
3464xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003465 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003466 const xmlChar *ret;
3467 int count = 0;
3468
Daniel Veillardc6561462009-03-25 10:22:31 +00003469#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003470 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003471#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003472
3473 /*
3474 * Accelerator for simple ASCII names
3475 */
3476 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003477 e = ctxt->input->end;
3478 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3479 ((*in >= 0x41) && (*in <= 0x5A)) ||
3480 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003481 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003482 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483 ((*in >= 0x41) && (*in <= 0x5A)) ||
3484 ((*in >= 0x30) && (*in <= 0x39)) ||
3485 (*in == '_') || (*in == '-') ||
3486 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003487 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003488 if (in >= e)
3489 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003490 if ((*in > 0) && (*in < 0x80)) {
3491 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003492 if ((count > XML_MAX_NAME_LENGTH) &&
3493 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3494 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3495 return(NULL);
3496 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003497 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3498 ctxt->input->cur = in;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003499 ctxt->input->col += count;
3500 if (ret == NULL) {
3501 xmlErrMemory(ctxt, NULL);
3502 }
3503 return(ret);
3504 }
3505 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003506complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003507 return(xmlParseNCNameComplex(ctxt));
3508}
3509
Daniel Veillard46de64e2002-05-29 08:21:33 +00003510/**
3511 * xmlParseNameAndCompare:
3512 * @ctxt: an XML parser context
3513 *
3514 * parse an XML name and compares for match
3515 * (specialized for endtag parsing)
3516 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003517 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3518 * and the name for mismatch
3519 */
3520
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003521static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003522xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003523 register const xmlChar *cmp = other;
3524 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003525 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003526
3527 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003528 if (ctxt->instate == XML_PARSER_EOF)
3529 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003530
Daniel Veillard46de64e2002-05-29 08:21:33 +00003531 in = ctxt->input->cur;
3532 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003533 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003534 ++cmp;
3535 }
William M. Brack76e95df2003-10-18 16:20:14 +00003536 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003537 /* success */
Haibo Huangf0a546b2020-09-01 20:28:19 -07003538 ctxt->input->col += in - ctxt->input->cur;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003539 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003540 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003541 }
3542 /* failure (or end of input buffer), check with full function */
3543 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003544 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003545 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003546 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003547 }
3548 return ret;
3549}
3550
Owen Taylor3473f882001-02-23 17:55:21 +00003551/**
3552 * xmlParseStringName:
3553 * @ctxt: an XML parser context
3554 * @str: a pointer to the string pointer (IN/OUT)
3555 *
3556 * parse an XML name.
3557 *
3558 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3559 * CombiningChar | Extender
3560 *
3561 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3562 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003563 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003564 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003565 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003566 * is updated to the current location in the string.
3567 */
3568
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003569static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003570xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3571 xmlChar buf[XML_MAX_NAMELEN + 5];
3572 const xmlChar *cur = *str;
3573 int len = 0, l;
3574 int c;
3575
Daniel Veillardc6561462009-03-25 10:22:31 +00003576#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003577 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003578#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003579
Owen Taylor3473f882001-02-23 17:55:21 +00003580 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003581 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003582 return(NULL);
3583 }
3584
Daniel Veillard34e3f642008-07-29 09:02:27 +00003585 COPY_BUF(l,buf,len,c);
3586 cur += l;
3587 c = CUR_SCHAR(cur, l);
3588 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003589 COPY_BUF(l,buf,len,c);
3590 cur += l;
3591 c = CUR_SCHAR(cur, l);
3592 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3593 /*
3594 * Okay someone managed to make a huge name, so he's ready to pay
3595 * for the processing speed.
3596 */
3597 xmlChar *buffer;
3598 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003599
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003600 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003601 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003602 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003603 return(NULL);
3604 }
3605 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003606 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003607 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003608 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003609
3610 if ((len > XML_MAX_NAME_LENGTH) &&
3611 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3612 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613 xmlFree(buffer);
3614 return(NULL);
3615 }
Owen Taylor3473f882001-02-23 17:55:21 +00003616 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003617 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003618 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003619 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003620 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003621 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003622 return(NULL);
3623 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003624 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003625 }
3626 COPY_BUF(l,buffer,len,c);
3627 cur += l;
3628 c = CUR_SCHAR(cur, l);
3629 }
3630 buffer[len] = 0;
3631 *str = cur;
3632 return(buffer);
3633 }
3634 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003635 if ((len > XML_MAX_NAME_LENGTH) &&
3636 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3637 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3638 return(NULL);
3639 }
Owen Taylor3473f882001-02-23 17:55:21 +00003640 *str = cur;
3641 return(xmlStrndup(buf, len));
3642}
3643
3644/**
3645 * xmlParseNmtoken:
3646 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003647 *
Owen Taylor3473f882001-02-23 17:55:21 +00003648 * parse an XML Nmtoken.
3649 *
3650 * [7] Nmtoken ::= (NameChar)+
3651 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003652 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003653 *
3654 * Returns the Nmtoken parsed or NULL
3655 */
3656
3657xmlChar *
3658xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3659 xmlChar buf[XML_MAX_NAMELEN + 5];
3660 int len = 0, l;
3661 int c;
3662 int count = 0;
3663
Daniel Veillardc6561462009-03-25 10:22:31 +00003664#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003665 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003666#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003667
Owen Taylor3473f882001-02-23 17:55:21 +00003668 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003669 if (ctxt->instate == XML_PARSER_EOF)
3670 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003671 c = CUR_CHAR(l);
3672
Daniel Veillard34e3f642008-07-29 09:02:27 +00003673 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003674 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003675 count = 0;
3676 GROW;
3677 }
3678 COPY_BUF(l,buf,len,c);
3679 NEXTL(l);
3680 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003681 if (c == 0) {
3682 count = 0;
3683 GROW;
3684 if (ctxt->instate == XML_PARSER_EOF)
3685 return(NULL);
3686 c = CUR_CHAR(l);
3687 }
Owen Taylor3473f882001-02-23 17:55:21 +00003688 if (len >= XML_MAX_NAMELEN) {
3689 /*
3690 * Okay someone managed to make a huge token, so he's ready to pay
3691 * for the processing speed.
3692 */
3693 xmlChar *buffer;
3694 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003695
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003696 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003697 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003698 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003699 return(NULL);
3700 }
3701 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003702 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003703 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003704 count = 0;
3705 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003706 if (ctxt->instate == XML_PARSER_EOF) {
3707 xmlFree(buffer);
3708 return(NULL);
3709 }
Owen Taylor3473f882001-02-23 17:55:21 +00003710 }
3711 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003712 xmlChar *tmp;
3713
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003714 if ((max > XML_MAX_NAME_LENGTH) &&
3715 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3716 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3717 xmlFree(buffer);
3718 return(NULL);
3719 }
Owen Taylor3473f882001-02-23 17:55:21 +00003720 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003721 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003722 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003723 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003724 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003725 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003726 return(NULL);
3727 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003728 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003729 }
3730 COPY_BUF(l,buffer,len,c);
3731 NEXTL(l);
3732 c = CUR_CHAR(l);
3733 }
3734 buffer[len] = 0;
3735 return(buffer);
3736 }
3737 }
3738 if (len == 0)
3739 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003740 if ((len > XML_MAX_NAME_LENGTH) &&
3741 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3742 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3743 return(NULL);
3744 }
Owen Taylor3473f882001-02-23 17:55:21 +00003745 return(xmlStrndup(buf, len));
3746}
3747
3748/**
3749 * xmlParseEntityValue:
3750 * @ctxt: an XML parser context
3751 * @orig: if non-NULL store a copy of the original entity value
3752 *
3753 * parse a value for ENTITY declarations
3754 *
3755 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3756 * "'" ([^%&'] | PEReference | Reference)* "'"
3757 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003758 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003759 */
3760
3761xmlChar *
3762xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3763 xmlChar *buf = NULL;
3764 int len = 0;
3765 int size = XML_PARSER_BUFFER_SIZE;
3766 int c, l;
3767 xmlChar stop;
3768 xmlChar *ret = NULL;
3769 const xmlChar *cur = NULL;
3770 xmlParserInputPtr input;
3771
3772 if (RAW == '"') stop = '"';
3773 else if (RAW == '\'') stop = '\'';
3774 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003775 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003776 return(NULL);
3777 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003778 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003779 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003780 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003781 return(NULL);
3782 }
3783
3784 /*
3785 * The content of the entity definition is copied in a buffer.
3786 */
3787
3788 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3789 input = ctxt->input;
3790 GROW;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003791 if (ctxt->instate == XML_PARSER_EOF)
3792 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 NEXT;
3794 c = CUR_CHAR(l);
3795 /*
3796 * NOTE: 4.4.5 Included in Literal
3797 * When a parameter entity reference appears in a literal entity
3798 * value, ... a single or double quote character in the replacement
3799 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003800 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003801 * In practice it means we stop the loop only when back at parsing
3802 * the initial entity and the quote is found
3803 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003804 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3805 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003806 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003807 xmlChar *tmp;
3808
Owen Taylor3473f882001-02-23 17:55:21 +00003809 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003810 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3811 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003812 xmlErrMemory(ctxt, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003813 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003815 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003816 }
3817 COPY_BUF(l,buf,len,c);
3818 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003819
3820 GROW;
3821 c = CUR_CHAR(l);
3822 if (c == 0) {
3823 GROW;
3824 c = CUR_CHAR(l);
3825 }
3826 }
3827 buf[len] = 0;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003828 if (ctxt->instate == XML_PARSER_EOF)
3829 goto error;
3830 if (c != stop) {
3831 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3832 goto error;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003833 }
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003834 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00003835
3836 /*
3837 * Raise problem w.r.t. '&' and '%' being used in non-entities
3838 * reference constructs. Note Charref will be handled in
3839 * xmlStringDecodeEntities()
3840 */
3841 cur = buf;
3842 while (*cur != 0) { /* non input consuming */
3843 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3844 xmlChar *name;
3845 xmlChar tmp = *cur;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003846 int nameOk = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003847
3848 cur++;
3849 name = xmlParseStringName(ctxt, &cur);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003850 if (name != NULL) {
3851 nameOk = 1;
3852 xmlFree(name);
3853 }
3854 if ((nameOk == 0) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003855 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003856 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003857 tmp);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003858 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003859 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003860 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3861 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003862 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003863 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003864 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003865 if (*cur == 0)
3866 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003867 }
3868 cur++;
3869 }
3870
3871 /*
3872 * Then PEReference entities are substituted.
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003873 *
3874 * NOTE: 4.4.7 Bypassed
3875 * When a general entity reference appears in the EntityValue in
3876 * an entity declaration, it is bypassed and left as is.
3877 * so XML_SUBSTITUTE_REF is not set here.
Owen Taylor3473f882001-02-23 17:55:21 +00003878 */
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003879 ++ctxt->depth;
3880 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3881 0, 0, 0);
3882 --ctxt->depth;
3883 if (orig != NULL) {
3884 *orig = buf;
3885 buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003886 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003887
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003888error:
3889 if (buf != NULL)
3890 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003891 return(ret);
3892}
3893
3894/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003895 * xmlParseAttValueComplex:
3896 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003897 * @len: the resulting attribute len
Haibo Huangcfd91dc2020-07-30 23:01:33 -07003898 * @normalize: whether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003899 *
3900 * parse a value for an attribute, this is the fallback function
3901 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003902 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003903 *
3904 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3905 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003906static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003907xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003908 xmlChar limit = 0;
3909 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003910 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003911 size_t len = 0;
3912 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003913 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003914 xmlChar *current = NULL;
3915 xmlEntityPtr ent;
3916
Owen Taylor3473f882001-02-23 17:55:21 +00003917 if (NXT(0) == '"') {
3918 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3919 limit = '"';
3920 NEXT;
3921 } else if (NXT(0) == '\'') {
3922 limit = '\'';
3923 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3924 NEXT;
3925 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003926 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003927 return(NULL);
3928 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003929
Owen Taylor3473f882001-02-23 17:55:21 +00003930 /*
3931 * allocate a translation buffer.
3932 */
3933 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003934 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003935 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003936
3937 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003938 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003939 */
3940 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003941 while (((NXT(0) != limit) && /* checked */
3942 (IS_CHAR(c)) && (c != '<')) &&
3943 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003944 /*
3945 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3946 * special option is given
3947 */
3948 if ((len > XML_MAX_TEXT_LENGTH) &&
3949 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3950 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003951 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003952 goto mem_error;
3953 }
Daniel Veillardfdc91562002-07-01 21:52:03 +00003954 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003955 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003956 if (NXT(1) == '#') {
3957 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003958
Owen Taylor3473f882001-02-23 17:55:21 +00003959 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003960 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003961 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003962 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003963 }
3964 buf[len++] = '&';
3965 } else {
3966 /*
3967 * The reparsing will be done in xmlStringGetNodeList()
3968 * called by the attribute() function in SAX.c
3969 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003970 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003971 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003972 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003973 buf[len++] = '&';
3974 buf[len++] = '#';
3975 buf[len++] = '3';
3976 buf[len++] = '8';
3977 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003978 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003979 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003980 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003981 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003982 }
Owen Taylor3473f882001-02-23 17:55:21 +00003983 len += xmlCopyChar(0, &buf[len], val);
3984 }
3985 } else {
3986 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003987 ctxt->nbentities++;
3988 if (ent != NULL)
3989 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003990 if ((ent != NULL) &&
3991 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003992 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003993 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003994 }
3995 if ((ctxt->replaceEntities == 0) &&
3996 (ent->content[0] == '&')) {
3997 buf[len++] = '&';
3998 buf[len++] = '#';
3999 buf[len++] = '3';
4000 buf[len++] = '8';
4001 buf[len++] = ';';
4002 } else {
4003 buf[len++] = ent->content[0];
4004 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004005 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004006 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004007 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02004008 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004009 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004010 XML_SUBSTITUTE_REF,
4011 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004012 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004013 if (rep != NULL) {
4014 current = rep;
4015 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004016 if ((*current == 0xD) || (*current == 0xA) ||
4017 (*current == 0x9)) {
4018 buf[len++] = 0x20;
4019 current++;
4020 } else
4021 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004022 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004023 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004024 }
4025 }
4026 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004027 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004028 }
4029 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004030 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004031 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004032 }
Owen Taylor3473f882001-02-23 17:55:21 +00004033 if (ent->content != NULL)
4034 buf[len++] = ent->content[0];
4035 }
4036 } else if (ent != NULL) {
4037 int i = xmlStrlen(ent->name);
4038 const xmlChar *cur = ent->name;
4039
4040 /*
4041 * This may look absurd but is needed to detect
4042 * entities problems
4043 */
4044 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004045 (ent->content != NULL) && (ent->checked == 0)) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004046 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004047
Peter Simons8f30bdf2016-04-15 11:56:55 +02004048 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004049 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004050 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004051 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004052
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004053 diff = ctxt->nbentities - oldnbent + 1;
4054 if (diff > INT_MAX / 2)
4055 diff = INT_MAX / 2;
4056 ent->checked = diff * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004057 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004058 if (xmlStrchr(rep, '<'))
4059 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004060 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004061 rep = NULL;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02004062 } else {
4063 ent->content[0] = 0;
4064 }
Owen Taylor3473f882001-02-23 17:55:21 +00004065 }
4066
4067 /*
4068 * Just output the reference
4069 */
4070 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004071 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004072 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004073 }
4074 for (;i > 0;i--)
4075 buf[len++] = *cur++;
4076 buf[len++] = ';';
4077 }
4078 }
4079 } else {
4080 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004081 if ((len != 0) || (!normalize)) {
4082 if ((!normalize) || (!in_space)) {
4083 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004084 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004085 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004086 }
4087 }
4088 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004089 }
4090 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004091 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004092 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004093 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004094 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004095 }
4096 }
4097 NEXTL(l);
4098 }
4099 GROW;
4100 c = CUR_CHAR(l);
4101 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004102 if (ctxt->instate == XML_PARSER_EOF)
4103 goto error;
4104
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004105 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004106 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004107 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004108 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004109 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004110 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004111 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004112 if ((c != 0) && (!IS_CHAR(c))) {
4113 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4114 "invalid character in attribute value\n");
4115 } else {
4116 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4117 "AttValue: ' expected\n");
4118 }
Owen Taylor3473f882001-02-23 17:55:21 +00004119 } else
4120 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004121
4122 /*
4123 * There we potentially risk an overflow, don't allow attribute value of
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004124 * length more than INT_MAX it is a very reasonable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004125 */
4126 if (len >= INT_MAX) {
4127 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004128 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004129 goto mem_error;
4130 }
4131
4132 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004133 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004134
4135mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004136 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004137error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004138 if (buf != NULL)
4139 xmlFree(buf);
4140 if (rep != NULL)
4141 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004142 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004143}
4144
4145/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004146 * xmlParseAttValue:
4147 * @ctxt: an XML parser context
4148 *
4149 * parse a value for an attribute
4150 * Note: the parser won't do substitution of entities here, this
4151 * will be handled later in xmlStringGetNodeList
4152 *
4153 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4154 * "'" ([^<&'] | Reference)* "'"
4155 *
4156 * 3.3.3 Attribute-Value Normalization:
4157 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004158 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004159 * - a character reference is processed by appending the referenced
4160 * character to the attribute value
4161 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004162 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004163 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4164 * appending #x20 to the normalized value, except that only a single
4165 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004166 * parsed entity or the literal entity value of an internal parsed entity
4167 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004168 * If the declared value is not CDATA, then the XML processor must further
4169 * process the normalized attribute value by discarding any leading and
4170 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004171 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004172 * All attributes for which no declaration has been read should be treated
4173 * by a non-validating parser as if declared CDATA.
4174 *
4175 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4176 */
4177
4178
4179xmlChar *
4180xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004181 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004182 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004183}
4184
4185/**
Owen Taylor3473f882001-02-23 17:55:21 +00004186 * xmlParseSystemLiteral:
4187 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004188 *
Owen Taylor3473f882001-02-23 17:55:21 +00004189 * parse an XML Literal
4190 *
4191 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4192 *
4193 * Returns the SystemLiteral parsed or NULL
4194 */
4195
4196xmlChar *
4197xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4198 xmlChar *buf = NULL;
4199 int len = 0;
4200 int size = XML_PARSER_BUFFER_SIZE;
4201 int cur, l;
4202 xmlChar stop;
4203 int state = ctxt->instate;
4204 int count = 0;
4205
4206 SHRINK;
4207 if (RAW == '"') {
4208 NEXT;
4209 stop = '"';
4210 } else if (RAW == '\'') {
4211 NEXT;
4212 stop = '\'';
4213 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004214 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004215 return(NULL);
4216 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004217
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004218 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004219 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004220 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004221 return(NULL);
4222 }
4223 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4224 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004225 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004226 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004227 xmlChar *tmp;
4228
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004229 if ((size > XML_MAX_NAME_LENGTH) &&
4230 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4231 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4232 xmlFree(buf);
4233 ctxt->instate = (xmlParserInputState) state;
4234 return(NULL);
4235 }
Owen Taylor3473f882001-02-23 17:55:21 +00004236 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004237 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4238 if (tmp == NULL) {
4239 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004240 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004241 ctxt->instate = (xmlParserInputState) state;
4242 return(NULL);
4243 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004244 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004245 }
4246 count++;
4247 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004248 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004249 GROW;
4250 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004251 if (ctxt->instate == XML_PARSER_EOF) {
4252 xmlFree(buf);
4253 return(NULL);
4254 }
Owen Taylor3473f882001-02-23 17:55:21 +00004255 }
4256 COPY_BUF(l,buf,len,cur);
4257 NEXTL(l);
4258 cur = CUR_CHAR(l);
4259 if (cur == 0) {
4260 GROW;
4261 SHRINK;
4262 cur = CUR_CHAR(l);
4263 }
4264 }
4265 buf[len] = 0;
4266 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004267 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004268 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004269 } else {
4270 NEXT;
4271 }
4272 return(buf);
4273}
4274
4275/**
4276 * xmlParsePubidLiteral:
4277 * @ctxt: an XML parser context
4278 *
4279 * parse an XML public literal
4280 *
4281 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4282 *
4283 * Returns the PubidLiteral parsed or NULL.
4284 */
4285
4286xmlChar *
4287xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4288 xmlChar *buf = NULL;
4289 int len = 0;
4290 int size = XML_PARSER_BUFFER_SIZE;
4291 xmlChar cur;
4292 xmlChar stop;
4293 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004294 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004295
4296 SHRINK;
4297 if (RAW == '"') {
4298 NEXT;
4299 stop = '"';
4300 } else if (RAW == '\'') {
4301 NEXT;
4302 stop = '\'';
4303 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004304 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004305 return(NULL);
4306 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004307 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004308 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004309 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004310 return(NULL);
4311 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004312 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004313 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004314 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004315 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004316 xmlChar *tmp;
4317
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004318 if ((size > XML_MAX_NAME_LENGTH) &&
4319 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4320 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4321 xmlFree(buf);
4322 return(NULL);
4323 }
Owen Taylor3473f882001-02-23 17:55:21 +00004324 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004325 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4326 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004327 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004328 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004329 return(NULL);
4330 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004331 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
4333 buf[len++] = cur;
4334 count++;
4335 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004336 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004337 GROW;
4338 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004339 if (ctxt->instate == XML_PARSER_EOF) {
4340 xmlFree(buf);
4341 return(NULL);
4342 }
Owen Taylor3473f882001-02-23 17:55:21 +00004343 }
4344 NEXT;
4345 cur = CUR;
4346 if (cur == 0) {
4347 GROW;
4348 SHRINK;
4349 cur = CUR;
4350 }
4351 }
4352 buf[len] = 0;
4353 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004354 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004355 } else {
4356 NEXT;
4357 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004358 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004359 return(buf);
4360}
4361
Daniel Veillard8ed10722009-08-20 19:17:36 +02004362static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004363
4364/*
4365 * used for the test in the inner loop of the char data testing
4366 */
4367static const unsigned char test_char_data[256] = {
4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4373 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4374 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4375 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4376 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4377 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4378 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4379 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4380 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4381 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4382 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4383 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4400};
4401
Owen Taylor3473f882001-02-23 17:55:21 +00004402/**
4403 * xmlParseCharData:
4404 * @ctxt: an XML parser context
4405 * @cdata: int indicating whether we are within a CDATA section
4406 *
4407 * parse a CharData section.
4408 * if we are within a CDATA section ']]>' marks an end of section.
4409 *
4410 * The right angle bracket (>) may be represented using the string "&gt;",
4411 * and must, for compatibility, be escaped using "&gt;" or a character
4412 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004413 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004414 *
4415 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4416 */
4417
4418void
4419xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004420 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004421 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004422 int line = ctxt->input->line;
4423 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004424 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004425
4426 SHRINK;
4427 GROW;
4428 /*
4429 * Accelerated common case where input don't need to be
4430 * modified before passing it to the handler.
4431 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004432 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004433 in = ctxt->input->cur;
4434 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004435get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004436 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004437 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004438 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004439 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004440 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004441 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004442 goto get_more_space;
4443 }
4444 if (*in == '<') {
4445 nbchar = in - ctxt->input->cur;
4446 if (nbchar > 0) {
4447 const xmlChar *tmp = ctxt->input->cur;
4448 ctxt->input->cur = in;
4449
Daniel Veillard34099b42004-11-04 17:34:35 +00004450 if ((ctxt->sax != NULL) &&
4451 (ctxt->sax->ignorableWhitespace !=
4452 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004453 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004454 if (ctxt->sax->ignorableWhitespace != NULL)
4455 ctxt->sax->ignorableWhitespace(ctxt->userData,
4456 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004457 } else {
4458 if (ctxt->sax->characters != NULL)
4459 ctxt->sax->characters(ctxt->userData,
4460 tmp, nbchar);
4461 if (*ctxt->space == -1)
4462 *ctxt->space = -2;
4463 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004464 } else if ((ctxt->sax != NULL) &&
4465 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004466 ctxt->sax->characters(ctxt->userData,
4467 tmp, nbchar);
4468 }
4469 }
4470 return;
4471 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004472
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004473get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004474 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004475 while (test_char_data[*in]) {
4476 in++;
4477 ccol++;
4478 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004479 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004480 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004481 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004482 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004483 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004484 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004485 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004486 }
4487 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004488 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004489 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004490 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004491 return;
4492 }
4493 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004494 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004495 goto get_more;
4496 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004497 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004498 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004499 if ((ctxt->sax != NULL) &&
4500 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004501 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004502 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004503 const xmlChar *tmp = ctxt->input->cur;
4504 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004505
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004506 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004507 if (ctxt->sax->ignorableWhitespace != NULL)
4508 ctxt->sax->ignorableWhitespace(ctxt->userData,
4509 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004510 } else {
4511 if (ctxt->sax->characters != NULL)
4512 ctxt->sax->characters(ctxt->userData,
4513 tmp, nbchar);
4514 if (*ctxt->space == -1)
4515 *ctxt->space = -2;
4516 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004517 line = ctxt->input->line;
4518 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004519 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004520 if (ctxt->sax->characters != NULL)
4521 ctxt->sax->characters(ctxt->userData,
4522 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004523 line = ctxt->input->line;
4524 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004525 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004526 /* something really bad happened in the SAX callback */
4527 if (ctxt->instate != XML_PARSER_CONTENT)
4528 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004529 }
4530 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004531 if (*in == 0xD) {
4532 in++;
4533 if (*in == 0xA) {
4534 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004535 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004536 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004537 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004538 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004539 in--;
4540 }
4541 if (*in == '<') {
4542 return;
4543 }
4544 if (*in == '&') {
4545 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004546 }
4547 SHRINK;
4548 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004549 if (ctxt->instate == XML_PARSER_EOF)
4550 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004551 in = ctxt->input->cur;
Haibo Huangd23e46c2020-10-28 22:26:09 -07004552 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004553 nbchar = 0;
4554 }
Daniel Veillard50582112001-03-26 22:52:16 +00004555 ctxt->input->line = line;
4556 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004557 xmlParseCharDataComplex(ctxt, cdata);
4558}
4559
Daniel Veillard01c13b52002-12-10 15:19:08 +00004560/**
4561 * xmlParseCharDataComplex:
4562 * @ctxt: an XML parser context
4563 * @cdata: int indicating whether we are within a CDATA section
4564 *
4565 * parse a CharData section.this is the fallback function
4566 * of xmlParseCharData() when the parsing requires handling
4567 * of non-ASCII characters.
4568 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004569static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004570xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004571 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4572 int nbchar = 0;
4573 int cur, l;
4574 int count = 0;
4575
4576 SHRINK;
4577 GROW;
4578 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004579 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004580 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004581 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004582 if ((cur == ']') && (NXT(1) == ']') &&
4583 (NXT(2) == '>')) {
4584 if (cdata) break;
4585 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004586 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004587 }
4588 }
4589 COPY_BUF(l,buf,nbchar,cur);
4590 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004591 buf[nbchar] = 0;
4592
Owen Taylor3473f882001-02-23 17:55:21 +00004593 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004594 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004595 */
4596 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004597 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004598 if (ctxt->sax->ignorableWhitespace != NULL)
4599 ctxt->sax->ignorableWhitespace(ctxt->userData,
4600 buf, nbchar);
4601 } else {
4602 if (ctxt->sax->characters != NULL)
4603 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004604 if ((ctxt->sax->characters !=
4605 ctxt->sax->ignorableWhitespace) &&
4606 (*ctxt->space == -1))
4607 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004608 }
4609 }
4610 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004611 /* something really bad happened in the SAX callback */
4612 if (ctxt->instate != XML_PARSER_CONTENT)
4613 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004614 }
4615 count++;
4616 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004617 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004618 GROW;
4619 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004620 if (ctxt->instate == XML_PARSER_EOF)
4621 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004622 }
4623 NEXTL(l);
4624 cur = CUR_CHAR(l);
4625 }
4626 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004627 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004628 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004629 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004630 */
4631 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004632 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004633 if (ctxt->sax->ignorableWhitespace != NULL)
4634 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4635 } else {
4636 if (ctxt->sax->characters != NULL)
4637 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004638 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4639 (*ctxt->space == -1))
4640 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004641 }
4642 }
4643 }
Nick Wellnhofer69936b12017-08-30 14:16:01 +02004644 if ((cur != 0) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004645 /* Generate the error and skip the offending character */
4646 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4647 "PCDATA invalid Char value %d\n",
4648 cur);
4649 NEXTL(l);
4650 }
Owen Taylor3473f882001-02-23 17:55:21 +00004651}
4652
4653/**
4654 * xmlParseExternalID:
4655 * @ctxt: an XML parser context
4656 * @publicID: a xmlChar** receiving PubidLiteral
4657 * @strict: indicate whether we should restrict parsing to only
4658 * production [75], see NOTE below
4659 *
4660 * Parse an External ID or a Public ID
4661 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004662 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004663 * 'PUBLIC' S PubidLiteral S SystemLiteral
4664 *
4665 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4666 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4667 *
4668 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4669 *
4670 * Returns the function returns SystemLiteral and in the second
4671 * case publicID receives PubidLiteral, is strict is off
4672 * it is possible to return NULL and have publicID set.
4673 */
4674
4675xmlChar *
4676xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4677 xmlChar *URI = NULL;
4678
4679 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004680
4681 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004682 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004683 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004684 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4686 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004687 }
Owen Taylor3473f882001-02-23 17:55:21 +00004688 URI = xmlParseSystemLiteral(ctxt);
4689 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004690 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004691 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004692 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004693 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004694 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004695 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004696 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004697 }
Owen Taylor3473f882001-02-23 17:55:21 +00004698 *publicID = xmlParsePubidLiteral(ctxt);
4699 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004700 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004701 }
4702 if (strict) {
4703 /*
4704 * We don't handle [83] so "S SystemLiteral" is required.
4705 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004706 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004708 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004709 }
4710 } else {
4711 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004712 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004713 * "S SystemLiteral" is not detected. We skip blanks if no
4714 * system literal was found, but this is harmless since we must
4715 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004716 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004717 if (SKIP_BLANKS == 0) return(NULL);
4718 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004719 }
Owen Taylor3473f882001-02-23 17:55:21 +00004720 URI = xmlParseSystemLiteral(ctxt);
4721 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004722 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
4724 }
4725 return(URI);
4726}
4727
4728/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004729 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004730 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004731 * @buf: the already parsed part of the buffer
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004732 * @len: number of bytes in the buffer
Daniel Veillard4c778d82005-01-23 17:37:44 +00004733 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004734 *
4735 * Skip an XML (SGML) comment <!-- .... -->
4736 * The spec says that "For compatibility, the string "--" (double-hyphen)
4737 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004738 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004739 *
4740 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4741 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004742static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004743xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4744 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004745 int q, ql;
4746 int r, rl;
4747 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004748 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004749 int inputid;
4750
4751 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004752
Owen Taylor3473f882001-02-23 17:55:21 +00004753 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004754 len = 0;
4755 size = XML_PARSER_BUFFER_SIZE;
4756 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4757 if (buf == NULL) {
4758 xmlErrMemory(ctxt, NULL);
4759 return;
4760 }
Owen Taylor3473f882001-02-23 17:55:21 +00004761 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004762 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004763 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004764 if (q == 0)
4765 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004766 if (!IS_CHAR(q)) {
4767 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4768 "xmlParseComment: invalid xmlChar value %d\n",
4769 q);
4770 xmlFree (buf);
4771 return;
4772 }
Owen Taylor3473f882001-02-23 17:55:21 +00004773 NEXTL(ql);
4774 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004775 if (r == 0)
4776 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004777 if (!IS_CHAR(r)) {
4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779 "xmlParseComment: invalid xmlChar value %d\n",
4780 q);
4781 xmlFree (buf);
4782 return;
4783 }
Owen Taylor3473f882001-02-23 17:55:21 +00004784 NEXTL(rl);
4785 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004786 if (cur == 0)
4787 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004788 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004789 ((cur != '>') ||
4790 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004791 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004792 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004794 if ((len > XML_MAX_TEXT_LENGTH) &&
4795 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4796 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797 "Comment too big found", NULL);
4798 xmlFree (buf);
4799 return;
4800 }
Owen Taylor3473f882001-02-23 17:55:21 +00004801 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004802 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004803 size_t new_size;
4804
4805 new_size = size * 2;
4806 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004807 if (new_buf == NULL) {
4808 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004810 return;
4811 }
William M. Bracka3215c72004-07-31 16:24:01 +00004812 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004813 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004814 }
4815 COPY_BUF(ql,buf,len,q);
4816 q = r;
4817 ql = rl;
4818 r = cur;
4819 rl = l;
4820
4821 count++;
4822 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004823 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004824 GROW;
4825 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004826 if (ctxt->instate == XML_PARSER_EOF) {
4827 xmlFree(buf);
4828 return;
4829 }
Owen Taylor3473f882001-02-23 17:55:21 +00004830 }
4831 NEXTL(l);
4832 cur = CUR_CHAR(l);
4833 if (cur == 0) {
4834 SHRINK;
4835 GROW;
4836 cur = CUR_CHAR(l);
4837 }
4838 }
4839 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004840 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004841 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004842 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004843 } else if (!IS_CHAR(cur)) {
4844 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4845 "xmlParseComment: invalid xmlChar value %d\n",
4846 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004847 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004848 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004849 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004850 "Comment doesn't start and stop in the same"
4851 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004852 }
4853 NEXT;
4854 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4855 (!ctxt->disableSAX))
4856 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
Daniel Veillardda629342007-08-01 07:49:06 +00004858 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004859 return;
4860not_terminated:
4861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862 "Comment not terminated\n", NULL);
4863 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004864 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004865}
Daniel Veillardda629342007-08-01 07:49:06 +00004866
Daniel Veillard4c778d82005-01-23 17:37:44 +00004867/**
4868 * xmlParseComment:
4869 * @ctxt: an XML parser context
4870 *
4871 * Skip an XML (SGML) comment <!-- .... -->
4872 * The spec says that "For compatibility, the string "--" (double-hyphen)
4873 * must not occur within comments. "
4874 *
4875 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4876 */
4877void
4878xmlParseComment(xmlParserCtxtPtr ctxt) {
4879 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004880 size_t size = XML_PARSER_BUFFER_SIZE;
4881 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004882 xmlParserInputState state;
4883 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004884 size_t nbchar = 0;
4885 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004886 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004887
4888 /*
4889 * Check that there is a comment right here.
4890 */
4891 if ((RAW != '<') || (NXT(1) != '!') ||
4892 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004893 state = ctxt->instate;
4894 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004895 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004896 SKIP(4);
4897 SHRINK;
4898 GROW;
4899
4900 /*
4901 * Accelerated common case where input don't need to be
4902 * modified before passing it to the handler.
4903 */
4904 in = ctxt->input->cur;
4905 do {
4906 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004907 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004908 ctxt->input->line++; ctxt->input->col = 1;
4909 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004910 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004911 }
4912get_more:
4913 ccol = ctxt->input->col;
4914 while (((*in > '-') && (*in <= 0x7F)) ||
4915 ((*in >= 0x20) && (*in < '-')) ||
4916 (*in == 0x09)) {
4917 in++;
4918 ccol++;
4919 }
4920 ctxt->input->col = ccol;
4921 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004922 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004923 ctxt->input->line++; ctxt->input->col = 1;
4924 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004925 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004926 goto get_more;
4927 }
4928 nbchar = in - ctxt->input->cur;
4929 /*
4930 * save current set of data
4931 */
4932 if (nbchar > 0) {
4933 if ((ctxt->sax != NULL) &&
4934 (ctxt->sax->comment != NULL)) {
4935 if (buf == NULL) {
4936 if ((*in == '-') && (in[1] == '-'))
4937 size = nbchar + 1;
4938 else
4939 size = XML_PARSER_BUFFER_SIZE + nbchar;
4940 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4941 if (buf == NULL) {
4942 xmlErrMemory(ctxt, NULL);
4943 ctxt->instate = state;
4944 return;
4945 }
4946 len = 0;
4947 } else if (len + nbchar + 1 >= size) {
4948 xmlChar *new_buf;
4949 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4950 new_buf = (xmlChar *) xmlRealloc(buf,
4951 size * sizeof(xmlChar));
4952 if (new_buf == NULL) {
4953 xmlFree (buf);
4954 xmlErrMemory(ctxt, NULL);
4955 ctxt->instate = state;
4956 return;
4957 }
4958 buf = new_buf;
4959 }
4960 memcpy(&buf[len], ctxt->input->cur, nbchar);
4961 len += nbchar;
4962 buf[len] = 0;
4963 }
4964 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004965 if ((len > XML_MAX_TEXT_LENGTH) &&
4966 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4967 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4968 "Comment too big found", NULL);
4969 xmlFree (buf);
4970 return;
4971 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004972 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004973 if (*in == 0xA) {
4974 in++;
4975 ctxt->input->line++; ctxt->input->col = 1;
4976 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004977 if (*in == 0xD) {
4978 in++;
4979 if (*in == 0xA) {
4980 ctxt->input->cur = in;
4981 in++;
4982 ctxt->input->line++; ctxt->input->col = 1;
4983 continue; /* while */
4984 }
4985 in--;
4986 }
4987 SHRINK;
4988 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004989 if (ctxt->instate == XML_PARSER_EOF) {
4990 xmlFree(buf);
4991 return;
4992 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004993 in = ctxt->input->cur;
4994 if (*in == '-') {
4995 if (in[1] == '-') {
4996 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004997 if (ctxt->input->id != inputid) {
4998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004999 "comment doesn't start and stop in the"
5000 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00005001 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005002 SKIP(3);
5003 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5004 (!ctxt->disableSAX)) {
5005 if (buf != NULL)
5006 ctxt->sax->comment(ctxt->userData, buf);
5007 else
5008 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5009 }
5010 if (buf != NULL)
5011 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005012 if (ctxt->instate != XML_PARSER_EOF)
5013 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005014 return;
5015 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005016 if (buf != NULL) {
5017 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5018 "Double hyphen within comment: "
5019 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005020 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005021 } else
5022 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5023 "Double hyphen within comment\n", NULL);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005024 if (ctxt->instate == XML_PARSER_EOF) {
5025 xmlFree(buf);
5026 return;
5027 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005028 in++;
5029 ctxt->input->col++;
5030 }
5031 in++;
5032 ctxt->input->col++;
5033 goto get_more;
5034 }
Haibo Huangd23e46c2020-10-28 22:26:09 -07005035 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
Daniel Veillard4c778d82005-01-23 17:37:44 +00005036 xmlParseCommentComplex(ctxt, buf, len, size);
5037 ctxt->instate = state;
5038 return;
5039}
5040
Owen Taylor3473f882001-02-23 17:55:21 +00005041
5042/**
5043 * xmlParsePITarget:
5044 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005045 *
Owen Taylor3473f882001-02-23 17:55:21 +00005046 * parse the name of a PI
5047 *
5048 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5049 *
5050 * Returns the PITarget name or NULL
5051 */
5052
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005053const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005054xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005055 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005056
5057 name = xmlParseName(ctxt);
5058 if ((name != NULL) &&
5059 ((name[0] == 'x') || (name[0] == 'X')) &&
5060 ((name[1] == 'm') || (name[1] == 'M')) &&
5061 ((name[2] == 'l') || (name[2] == 'L'))) {
5062 int i;
5063 if ((name[0] == 'x') && (name[1] == 'm') &&
5064 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005065 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005066 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005067 return(name);
5068 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005069 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005070 return(name);
5071 }
5072 for (i = 0;;i++) {
5073 if (xmlW3CPIs[i] == NULL) break;
5074 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5075 return(name);
5076 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005077 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5078 "xmlParsePITarget: invalid name prefix 'xml'\n",
5079 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005080 }
Daniel Veillard37334572008-07-31 08:20:02 +00005081 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005082 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005083 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005084 }
Owen Taylor3473f882001-02-23 17:55:21 +00005085 return(name);
5086}
5087
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005088#ifdef LIBXML_CATALOG_ENABLED
5089/**
5090 * xmlParseCatalogPI:
5091 * @ctxt: an XML parser context
5092 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005093 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005094 * parse an XML Catalog Processing Instruction.
5095 *
5096 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5097 *
5098 * Occurs only if allowed by the user and if happening in the Misc
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005099 * part of the document before any doctype information
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005100 * This will add the given catalog to the parsing context in order
5101 * to be used if there is a resolution need further down in the document
5102 */
5103
5104static void
5105xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5106 xmlChar *URL = NULL;
5107 const xmlChar *tmp, *base;
5108 xmlChar marker;
5109
5110 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005111 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005112 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5113 goto error;
5114 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005115 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005116 if (*tmp != '=') {
5117 return;
5118 }
5119 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005120 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005121 marker = *tmp;
5122 if ((marker != '\'') && (marker != '"'))
5123 goto error;
5124 tmp++;
5125 base = tmp;
5126 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5127 if (*tmp == 0)
5128 goto error;
5129 URL = xmlStrndup(base, tmp - base);
5130 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005131 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005132 if (*tmp != 0)
5133 goto error;
5134
5135 if (URL != NULL) {
5136 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5137 xmlFree(URL);
5138 }
5139 return;
5140
5141error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005142 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5143 "Catalog PI syntax error: %s\n",
5144 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005145 if (URL != NULL)
5146 xmlFree(URL);
5147}
5148#endif
5149
Owen Taylor3473f882001-02-23 17:55:21 +00005150/**
5151 * xmlParsePI:
5152 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005153 *
Owen Taylor3473f882001-02-23 17:55:21 +00005154 * parse an XML Processing Instruction.
5155 *
5156 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5157 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005158 * The processing is transferred to SAX once parsed.
Owen Taylor3473f882001-02-23 17:55:21 +00005159 */
5160
5161void
5162xmlParsePI(xmlParserCtxtPtr ctxt) {
5163 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005164 size_t len = 0;
5165 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005166 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005167 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 xmlParserInputState state;
5169 int count = 0;
5170
5171 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005172 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005173 state = ctxt->instate;
5174 ctxt->instate = XML_PARSER_PI;
5175 /*
5176 * this is a Processing Instruction.
5177 */
5178 SKIP(2);
5179 SHRINK;
5180
5181 /*
5182 * Parse the target name and check for special support like
5183 * namespace.
5184 */
5185 target = xmlParsePITarget(ctxt);
5186 if (target != NULL) {
5187 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005188 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005189 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005190 "PI declaration doesn't start and stop in"
5191 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005192 }
5193 SKIP(2);
5194
5195 /*
5196 * SAX: PI detected.
5197 */
5198 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5199 (ctxt->sax->processingInstruction != NULL))
5200 ctxt->sax->processingInstruction(ctxt->userData,
5201 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005202 if (ctxt->instate != XML_PARSER_EOF)
5203 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005204 return;
5205 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005206 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005207 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005208 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 ctxt->instate = state;
5210 return;
5211 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005212 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005213 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5214 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005215 }
Owen Taylor3473f882001-02-23 17:55:21 +00005216 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005217 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005218 ((cur != '?') || (NXT(1) != '>'))) {
5219 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005220 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005221 size_t new_size = size * 2;
5222 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005223 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005224 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005225 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 ctxt->instate = state;
5227 return;
5228 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005229 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005230 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005231 }
5232 count++;
5233 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08005234 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00005235 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005236 if (ctxt->instate == XML_PARSER_EOF) {
5237 xmlFree(buf);
5238 return;
5239 }
Owen Taylor3473f882001-02-23 17:55:21 +00005240 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005241 if ((len > XML_MAX_TEXT_LENGTH) &&
5242 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5243 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5244 "PI %s too big found", target);
5245 xmlFree(buf);
5246 ctxt->instate = state;
5247 return;
5248 }
Owen Taylor3473f882001-02-23 17:55:21 +00005249 }
5250 COPY_BUF(l,buf,len,cur);
5251 NEXTL(l);
5252 cur = CUR_CHAR(l);
5253 if (cur == 0) {
5254 SHRINK;
5255 GROW;
5256 cur = CUR_CHAR(l);
5257 }
5258 }
Daniel Veillard51304812012-07-19 20:34:26 +08005259 if ((len > XML_MAX_TEXT_LENGTH) &&
5260 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5261 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5262 "PI %s too big found", target);
5263 xmlFree(buf);
5264 ctxt->instate = state;
5265 return;
5266 }
Owen Taylor3473f882001-02-23 17:55:21 +00005267 buf[len] = 0;
5268 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005269 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5270 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005271 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005272 if (inputid != ctxt->input->id) {
5273 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5274 "PI declaration doesn't start and stop in"
5275 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005276 }
5277 SKIP(2);
5278
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005279#ifdef LIBXML_CATALOG_ENABLED
5280 if (((state == XML_PARSER_MISC) ||
5281 (state == XML_PARSER_START)) &&
5282 (xmlStrEqual(target, XML_CATALOG_PI))) {
5283 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5284 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5285 (allow == XML_CATA_ALLOW_ALL))
5286 xmlParseCatalogPI(ctxt, buf);
5287 }
5288#endif
5289
5290
Owen Taylor3473f882001-02-23 17:55:21 +00005291 /*
5292 * SAX: PI detected.
5293 */
5294 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5295 (ctxt->sax->processingInstruction != NULL))
5296 ctxt->sax->processingInstruction(ctxt->userData,
5297 target, buf);
5298 }
5299 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005300 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005301 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005302 }
Chris Evans77404b82011-12-14 16:18:25 +08005303 if (ctxt->instate != XML_PARSER_EOF)
5304 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005305 }
5306}
5307
5308/**
5309 * xmlParseNotationDecl:
5310 * @ctxt: an XML parser context
5311 *
5312 * parse a notation declaration
5313 *
5314 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5315 *
5316 * Hence there is actually 3 choices:
5317 * 'PUBLIC' S PubidLiteral
5318 * 'PUBLIC' S PubidLiteral S SystemLiteral
5319 * and 'SYSTEM' S SystemLiteral
5320 *
5321 * See the NOTE on xmlParseExternalID().
5322 */
5323
5324void
5325xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005326 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005327 xmlChar *Pubid;
5328 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005329
Daniel Veillarda07050d2003-10-19 14:46:32 +00005330 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005331 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005332 SHRINK;
5333 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005334 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005335 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5336 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005337 return;
5338 }
Owen Taylor3473f882001-02-23 17:55:21 +00005339
Daniel Veillard76d66f42001-05-16 21:05:17 +00005340 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005341 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005342 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005343 return;
5344 }
Daniel Veillard37334572008-07-31 08:20:02 +00005345 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005346 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005347 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005348 name, NULL, NULL);
5349 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005350 if (SKIP_BLANKS == 0) {
5351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Space required after the NOTATION name'\n");
5353 return;
5354 }
Owen Taylor3473f882001-02-23 17:55:21 +00005355
5356 /*
5357 * Parse the IDs.
5358 */
5359 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5360 SKIP_BLANKS;
5361
5362 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005363 if (inputid != ctxt->input->id) {
5364 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5365 "Notation declaration doesn't start and stop"
5366 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005367 }
5368 NEXT;
5369 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5370 (ctxt->sax->notationDecl != NULL))
5371 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5372 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005373 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005374 }
Owen Taylor3473f882001-02-23 17:55:21 +00005375 if (Systemid != NULL) xmlFree(Systemid);
5376 if (Pubid != NULL) xmlFree(Pubid);
5377 }
5378}
5379
5380/**
5381 * xmlParseEntityDecl:
5382 * @ctxt: an XML parser context
5383 *
5384 * parse <!ENTITY declarations
5385 *
5386 * [70] EntityDecl ::= GEDecl | PEDecl
5387 *
5388 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5389 *
5390 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5391 *
5392 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5393 *
5394 * [74] PEDef ::= EntityValue | ExternalID
5395 *
5396 * [76] NDataDecl ::= S 'NDATA' S Name
5397 *
5398 * [ VC: Notation Declared ]
5399 * The Name must match the declared name of a notation.
5400 */
5401
5402void
5403xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005404 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005405 xmlChar *value = NULL;
5406 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005407 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005408 int isParameter = 0;
5409 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005410
Daniel Veillard4c778d82005-01-23 17:37:44 +00005411 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005412 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005413 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005414 SHRINK;
5415 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005416 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005417 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5418 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005419 }
Owen Taylor3473f882001-02-23 17:55:21 +00005420
5421 if (RAW == '%') {
5422 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005423 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005424 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005425 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005426 }
Owen Taylor3473f882001-02-23 17:55:21 +00005427 isParameter = 1;
5428 }
5429
Daniel Veillard76d66f42001-05-16 21:05:17 +00005430 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005431 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005432 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5433 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005434 return;
5435 }
Daniel Veillard37334572008-07-31 08:20:02 +00005436 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005437 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005438 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005439 name, NULL, NULL);
5440 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005441 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005442 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5443 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005444 }
Owen Taylor3473f882001-02-23 17:55:21 +00005445
Daniel Veillardf5582f12002-06-11 10:08:16 +00005446 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005447 /*
5448 * handle the various case of definitions...
5449 */
5450 if (isParameter) {
5451 if ((RAW == '"') || (RAW == '\'')) {
5452 value = xmlParseEntityValue(ctxt, &orig);
5453 if (value) {
5454 if ((ctxt->sax != NULL) &&
5455 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5456 ctxt->sax->entityDecl(ctxt->userData, name,
5457 XML_INTERNAL_PARAMETER_ENTITY,
5458 NULL, NULL, value);
5459 }
5460 } else {
5461 URI = xmlParseExternalID(ctxt, &literal, 1);
5462 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005463 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005464 }
5465 if (URI) {
5466 xmlURIPtr uri;
5467
5468 uri = xmlParseURI((const char *) URI);
5469 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005470 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5471 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005472 /*
5473 * This really ought to be a well formedness error
5474 * but the XML Core WG decided otherwise c.f. issue
5475 * E26 of the XML erratas.
5476 */
Owen Taylor3473f882001-02-23 17:55:21 +00005477 } else {
5478 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005479 /*
5480 * Okay this is foolish to block those but not
5481 * invalid URIs.
5482 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005483 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005484 } else {
5485 if ((ctxt->sax != NULL) &&
5486 (!ctxt->disableSAX) &&
5487 (ctxt->sax->entityDecl != NULL))
5488 ctxt->sax->entityDecl(ctxt->userData, name,
5489 XML_EXTERNAL_PARAMETER_ENTITY,
5490 literal, URI, NULL);
5491 }
5492 xmlFreeURI(uri);
5493 }
5494 }
5495 }
5496 } else {
5497 if ((RAW == '"') || (RAW == '\'')) {
5498 value = xmlParseEntityValue(ctxt, &orig);
5499 if ((ctxt->sax != NULL) &&
5500 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5501 ctxt->sax->entityDecl(ctxt->userData, name,
5502 XML_INTERNAL_GENERAL_ENTITY,
5503 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005504 /*
5505 * For expat compatibility in SAX mode.
5506 */
5507 if ((ctxt->myDoc == NULL) ||
5508 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5509 if (ctxt->myDoc == NULL) {
5510 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005511 if (ctxt->myDoc == NULL) {
5512 xmlErrMemory(ctxt, "New Doc failed");
5513 return;
5514 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005515 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005516 }
5517 if (ctxt->myDoc->intSubset == NULL)
5518 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5519 BAD_CAST "fake", NULL, NULL);
5520
Daniel Veillard1af9a412003-08-20 22:54:39 +00005521 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5522 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005523 }
Owen Taylor3473f882001-02-23 17:55:21 +00005524 } else {
5525 URI = xmlParseExternalID(ctxt, &literal, 1);
5526 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005527 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005528 }
5529 if (URI) {
5530 xmlURIPtr uri;
5531
5532 uri = xmlParseURI((const char *)URI);
5533 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005534 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5535 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005536 /*
5537 * This really ought to be a well formedness error
5538 * but the XML Core WG decided otherwise c.f. issue
5539 * E26 of the XML erratas.
5540 */
Owen Taylor3473f882001-02-23 17:55:21 +00005541 } else {
5542 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005543 /*
5544 * Okay this is foolish to block those but not
5545 * invalid URIs.
5546 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005547 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005548 }
5549 xmlFreeURI(uri);
5550 }
5551 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005552 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5554 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005555 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005556 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005557 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005558 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005561 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005562 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005563 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5564 (ctxt->sax->unparsedEntityDecl != NULL))
5565 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5566 literal, URI, ndata);
5567 } else {
5568 if ((ctxt->sax != NULL) &&
5569 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5570 ctxt->sax->entityDecl(ctxt->userData, name,
5571 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5572 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005573 /*
5574 * For expat compatibility in SAX mode.
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005575 * assuming the entity replacement was asked for
Daniel Veillard5997aca2002-03-18 18:36:20 +00005576 */
5577 if ((ctxt->replaceEntities != 0) &&
5578 ((ctxt->myDoc == NULL) ||
5579 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5580 if (ctxt->myDoc == NULL) {
5581 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005582 if (ctxt->myDoc == NULL) {
5583 xmlErrMemory(ctxt, "New Doc failed");
5584 return;
5585 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005586 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005587 }
5588
5589 if (ctxt->myDoc->intSubset == NULL)
5590 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5591 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005592 xmlSAX2EntityDecl(ctxt, name,
5593 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5594 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005595 }
Owen Taylor3473f882001-02-23 17:55:21 +00005596 }
5597 }
5598 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005599 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005600 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005601 SKIP_BLANKS;
5602 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005603 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005604 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005605 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005606 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005607 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005609 "Entity declaration doesn't start and stop in"
5610 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005611 }
5612 NEXT;
5613 }
5614 if (orig != NULL) {
5615 /*
5616 * Ugly mechanism to save the raw entity value.
5617 */
5618 xmlEntityPtr cur = NULL;
5619
5620 if (isParameter) {
5621 if ((ctxt->sax != NULL) &&
5622 (ctxt->sax->getParameterEntity != NULL))
5623 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5624 } else {
5625 if ((ctxt->sax != NULL) &&
5626 (ctxt->sax->getEntity != NULL))
5627 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005628 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005629 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005630 }
Owen Taylor3473f882001-02-23 17:55:21 +00005631 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005632 if ((cur != NULL) && (cur->orig == NULL)) {
5633 cur->orig = orig;
5634 orig = NULL;
5635 }
Owen Taylor3473f882001-02-23 17:55:21 +00005636 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005637
5638done:
Owen Taylor3473f882001-02-23 17:55:21 +00005639 if (value != NULL) xmlFree(value);
5640 if (URI != NULL) xmlFree(URI);
5641 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005642 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005643 }
5644}
5645
5646/**
5647 * xmlParseDefaultDecl:
5648 * @ctxt: an XML parser context
5649 * @value: Receive a possible fixed default value for the attribute
5650 *
5651 * Parse an attribute default declaration
5652 *
5653 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5654 *
5655 * [ VC: Required Attribute ]
5656 * if the default declaration is the keyword #REQUIRED, then the
5657 * attribute must be specified for all elements of the type in the
5658 * attribute-list declaration.
5659 *
5660 * [ VC: Attribute Default Legal ]
5661 * The declared default value must meet the lexical constraints of
5662 * the declared attribute type c.f. xmlValidateAttributeDecl()
5663 *
5664 * [ VC: Fixed Attribute Default ]
5665 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005666 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005667 *
5668 * [ WFC: No < in Attribute Values ]
5669 * handled in xmlParseAttValue()
5670 *
5671 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005672 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005673 */
5674
5675int
5676xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5677 int val;
5678 xmlChar *ret;
5679
5680 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005681 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005682 SKIP(9);
5683 return(XML_ATTRIBUTE_REQUIRED);
5684 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005685 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005686 SKIP(8);
5687 return(XML_ATTRIBUTE_IMPLIED);
5688 }
5689 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005691 SKIP(6);
5692 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005693 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5695 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005696 }
Owen Taylor3473f882001-02-23 17:55:21 +00005697 }
5698 ret = xmlParseAttValue(ctxt);
5699 ctxt->instate = XML_PARSER_DTD;
5700 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005701 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005702 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005703 } else
5704 *value = ret;
5705 return(val);
5706}
5707
5708/**
5709 * xmlParseNotationType:
5710 * @ctxt: an XML parser context
5711 *
5712 * parse an Notation attribute type.
5713 *
5714 * Note: the leading 'NOTATION' S part has already being parsed...
5715 *
5716 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5717 *
5718 * [ VC: Notation Attributes ]
5719 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005720 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005721 *
5722 * Returns: the notation attribute tree built while parsing
5723 */
5724
5725xmlEnumerationPtr
5726xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005727 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005728 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005729
5730 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005731 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005732 return(NULL);
5733 }
5734 SHRINK;
5735 do {
5736 NEXT;
5737 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005738 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005739 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005740 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5741 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005742 xmlFreeEnumeration(ret);
5743 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005745 tmp = ret;
5746 while (tmp != NULL) {
5747 if (xmlStrEqual(name, tmp->name)) {
5748 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5749 "standalone: attribute notation value token %s duplicated\n",
5750 name, NULL);
5751 if (!xmlDictOwns(ctxt->dict, name))
5752 xmlFree((xmlChar *) name);
5753 break;
5754 }
5755 tmp = tmp->next;
5756 }
5757 if (tmp == NULL) {
5758 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005759 if (cur == NULL) {
5760 xmlFreeEnumeration(ret);
5761 return(NULL);
5762 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005763 if (last == NULL) ret = last = cur;
5764 else {
5765 last->next = cur;
5766 last = cur;
5767 }
Owen Taylor3473f882001-02-23 17:55:21 +00005768 }
5769 SKIP_BLANKS;
5770 } while (RAW == '|');
5771 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005772 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005773 xmlFreeEnumeration(ret);
5774 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776 NEXT;
5777 return(ret);
5778}
5779
5780/**
5781 * xmlParseEnumerationType:
5782 * @ctxt: an XML parser context
5783 *
5784 * parse an Enumeration attribute type.
5785 *
5786 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5787 *
5788 * [ VC: Enumeration ]
5789 * Values of this type must match one of the Nmtoken tokens in
5790 * the declaration
5791 *
5792 * Returns: the enumeration attribute tree built while parsing
5793 */
5794
5795xmlEnumerationPtr
5796xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5797 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005798 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005799
5800 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005801 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005802 return(NULL);
5803 }
5804 SHRINK;
5805 do {
5806 NEXT;
5807 SKIP_BLANKS;
5808 name = xmlParseNmtoken(ctxt);
5809 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005810 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005811 return(ret);
5812 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005813 tmp = ret;
5814 while (tmp != NULL) {
5815 if (xmlStrEqual(name, tmp->name)) {
5816 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5817 "standalone: attribute enumeration value token %s duplicated\n",
5818 name, NULL);
5819 if (!xmlDictOwns(ctxt->dict, name))
5820 xmlFree(name);
5821 break;
5822 }
5823 tmp = tmp->next;
5824 }
5825 if (tmp == NULL) {
5826 cur = xmlCreateEnumeration(name);
5827 if (!xmlDictOwns(ctxt->dict, name))
5828 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005829 if (cur == NULL) {
5830 xmlFreeEnumeration(ret);
5831 return(NULL);
5832 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005833 if (last == NULL) ret = last = cur;
5834 else {
5835 last->next = cur;
5836 last = cur;
5837 }
Owen Taylor3473f882001-02-23 17:55:21 +00005838 }
5839 SKIP_BLANKS;
5840 } while (RAW == '|');
5841 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005842 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 return(ret);
5844 }
5845 NEXT;
5846 return(ret);
5847}
5848
5849/**
5850 * xmlParseEnumeratedType:
5851 * @ctxt: an XML parser context
5852 * @tree: the enumeration tree built while parsing
5853 *
5854 * parse an Enumerated attribute type.
5855 *
5856 * [57] EnumeratedType ::= NotationType | Enumeration
5857 *
5858 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5859 *
5860 *
5861 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5862 */
5863
5864int
5865xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005866 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005867 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005868 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5870 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005871 return(0);
5872 }
Owen Taylor3473f882001-02-23 17:55:21 +00005873 *tree = xmlParseNotationType(ctxt);
5874 if (*tree == NULL) return(0);
5875 return(XML_ATTRIBUTE_NOTATION);
5876 }
5877 *tree = xmlParseEnumerationType(ctxt);
5878 if (*tree == NULL) return(0);
5879 return(XML_ATTRIBUTE_ENUMERATION);
5880}
5881
5882/**
5883 * xmlParseAttributeType:
5884 * @ctxt: an XML parser context
5885 * @tree: the enumeration tree built while parsing
5886 *
5887 * parse the Attribute list def for an element
5888 *
5889 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5890 *
5891 * [55] StringType ::= 'CDATA'
5892 *
5893 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5894 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5895 *
5896 * Validity constraints for attribute values syntax are checked in
5897 * xmlValidateAttributeValue()
5898 *
5899 * [ VC: ID ]
5900 * Values of type ID must match the Name production. A name must not
5901 * appear more than once in an XML document as a value of this type;
5902 * i.e., ID values must uniquely identify the elements which bear them.
5903 *
5904 * [ VC: One ID per Element Type ]
5905 * No element type may have more than one ID attribute specified.
5906 *
5907 * [ VC: ID Attribute Default ]
5908 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5909 *
5910 * [ VC: IDREF ]
5911 * Values of type IDREF must match the Name production, and values
5912 * of type IDREFS must match Names; each IDREF Name must match the value
5913 * of an ID attribute on some element in the XML document; i.e. IDREF
5914 * values must match the value of some ID attribute.
5915 *
5916 * [ VC: Entity Name ]
5917 * Values of type ENTITY must match the Name production, values
5918 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005919 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005920 *
5921 * [ VC: Name Token ]
5922 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005923 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005924 *
5925 * Returns the attribute type
5926 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005927int
Owen Taylor3473f882001-02-23 17:55:21 +00005928xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5929 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005930 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005931 SKIP(5);
5932 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005933 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005934 SKIP(6);
5935 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005936 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005937 SKIP(5);
5938 return(XML_ATTRIBUTE_IDREF);
5939 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5940 SKIP(2);
5941 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005942 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005943 SKIP(6);
5944 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005945 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005946 SKIP(8);
5947 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005948 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005949 SKIP(8);
5950 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005951 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005952 SKIP(7);
5953 return(XML_ATTRIBUTE_NMTOKEN);
5954 }
5955 return(xmlParseEnumeratedType(ctxt, tree));
5956}
5957
5958/**
5959 * xmlParseAttributeListDecl:
5960 * @ctxt: an XML parser context
5961 *
5962 * : parse the Attribute list def for an element
5963 *
5964 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5965 *
5966 * [53] AttDef ::= S Name S AttType S DefaultDecl
5967 *
5968 */
5969void
5970xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005971 const xmlChar *elemName;
5972 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005973 xmlEnumerationPtr tree;
5974
Daniel Veillarda07050d2003-10-19 14:46:32 +00005975 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005976 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005977
5978 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005979 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005980 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005981 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005982 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005983 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005984 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005985 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5986 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005987 return;
5988 }
5989 SKIP_BLANKS;
5990 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005991 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005992 int type;
5993 int def;
5994 xmlChar *defaultValue = NULL;
5995
5996 GROW;
5997 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005998 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005999 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006000 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6001 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006002 break;
6003 }
6004 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006005 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006006 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006007 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006008 break;
6009 }
Owen Taylor3473f882001-02-23 17:55:21 +00006010
6011 type = xmlParseAttributeType(ctxt, &tree);
6012 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006013 break;
6014 }
6015
6016 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006017 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006018 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6019 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006020 if (tree != NULL)
6021 xmlFreeEnumeration(tree);
6022 break;
6023 }
Owen Taylor3473f882001-02-23 17:55:21 +00006024
6025 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6026 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006027 if (defaultValue != NULL)
6028 xmlFree(defaultValue);
6029 if (tree != NULL)
6030 xmlFreeEnumeration(tree);
6031 break;
6032 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006033 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6034 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006035
6036 GROW;
6037 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006038 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006039 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006040 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006041 if (defaultValue != NULL)
6042 xmlFree(defaultValue);
6043 if (tree != NULL)
6044 xmlFreeEnumeration(tree);
6045 break;
6046 }
Owen Taylor3473f882001-02-23 17:55:21 +00006047 }
Owen Taylor3473f882001-02-23 17:55:21 +00006048 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6049 (ctxt->sax->attributeDecl != NULL))
6050 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6051 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006052 else if (tree != NULL)
6053 xmlFreeEnumeration(tree);
6054
6055 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006056 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006057 (def != XML_ATTRIBUTE_REQUIRED)) {
6058 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6059 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006060 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006061 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6062 }
Owen Taylor3473f882001-02-23 17:55:21 +00006063 if (defaultValue != NULL)
6064 xmlFree(defaultValue);
6065 GROW;
6066 }
6067 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006068 if (inputid != ctxt->input->id) {
6069 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6070 "Attribute list declaration doesn't start and"
6071 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006072 }
6073 NEXT;
6074 }
Owen Taylor3473f882001-02-23 17:55:21 +00006075 }
6076}
6077
6078/**
6079 * xmlParseElementMixedContentDecl:
6080 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006081 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006082 *
6083 * parse the declaration for a Mixed Element content
6084 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006085 *
Owen Taylor3473f882001-02-23 17:55:21 +00006086 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6087 * '(' S? '#PCDATA' S? ')'
6088 *
6089 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6090 *
6091 * [ VC: No Duplicate Types ]
6092 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006093 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006094 *
6095 * returns: the list of the xmlElementContentPtr describing the element choices
6096 */
6097xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006098xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006099 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006100 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006101
6102 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006103 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006104 SKIP(7);
6105 SKIP_BLANKS;
6106 SHRINK;
6107 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006108 if (ctxt->input->id != inputchk) {
6109 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6110 "Element content declaration doesn't start and"
6111 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006112 }
Owen Taylor3473f882001-02-23 17:55:21 +00006113 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006114 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006115 if (ret == NULL)
6116 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006117 if (RAW == '*') {
6118 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6119 NEXT;
6120 }
6121 return(ret);
6122 }
6123 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006124 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006125 if (ret == NULL) return(NULL);
6126 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006127 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006128 NEXT;
6129 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006130 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Haibo Huangd75f3892021-01-05 21:34:50 -08006131 if (ret == NULL) {
6132 xmlFreeDocElementContent(ctxt->myDoc, cur);
6133 return(NULL);
6134 }
Owen Taylor3473f882001-02-23 17:55:21 +00006135 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006136 if (cur != NULL)
6137 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006138 cur = ret;
6139 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006140 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Haibo Huangd75f3892021-01-05 21:34:50 -08006141 if (n == NULL) {
6142 xmlFreeDocElementContent(ctxt->myDoc, ret);
6143 return(NULL);
6144 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006145 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006146 if (n->c1 != NULL)
6147 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006148 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006149 if (n != NULL)
6150 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006151 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006152 }
6153 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006154 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006155 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006156 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006157 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006158 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006159 return(NULL);
6160 }
6161 SKIP_BLANKS;
6162 GROW;
6163 }
6164 if ((RAW == ')') && (NXT(1) == '*')) {
6165 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006166 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006167 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006168 if (cur->c2 != NULL)
6169 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006170 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006171 if (ret != NULL)
6172 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006173 if (ctxt->input->id != inputchk) {
6174 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6175 "Element content declaration doesn't start and"
6176 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006177 }
Owen Taylor3473f882001-02-23 17:55:21 +00006178 SKIP(2);
6179 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006180 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006181 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006182 return(NULL);
6183 }
6184
6185 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006186 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006187 }
6188 return(ret);
6189}
6190
6191/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006192 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006193 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006194 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006195 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006196 *
6197 * parse the declaration for a Mixed Element content
6198 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006199 *
Owen Taylor3473f882001-02-23 17:55:21 +00006200 *
6201 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6202 *
6203 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6204 *
6205 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6206 *
6207 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6208 *
6209 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6210 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006211 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006212 * opening or closing parentheses in a choice, seq, or Mixed
6213 * construct is contained in the replacement text for a parameter
6214 * entity, both must be contained in the same replacement text. For
6215 * interoperability, if a parameter-entity reference appears in a
6216 * choice, seq, or Mixed construct, its replacement text should not
6217 * be empty, and neither the first nor last non-blank character of
6218 * the replacement text should be a connector (| or ,).
6219 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006220 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006221 * hierarchy.
6222 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006223static xmlElementContentPtr
6224xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6225 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006226 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006227 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006228 xmlChar type = 0;
6229
Daniel Veillard489f9672009-08-10 16:49:30 +02006230 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6231 (depth > 2048)) {
6232 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6233"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6234 depth);
6235 return(NULL);
6236 }
Owen Taylor3473f882001-02-23 17:55:21 +00006237 SKIP_BLANKS;
6238 GROW;
6239 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006240 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006241
Owen Taylor3473f882001-02-23 17:55:21 +00006242 /* Recurse on first child */
6243 NEXT;
6244 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006245 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6246 depth + 1);
Elliott Hughes5cefca72021-05-06 13:23:15 -07006247 if (cur == NULL)
6248 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006249 SKIP_BLANKS;
6250 GROW;
6251 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006252 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006253 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006254 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006255 return(NULL);
6256 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006257 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006258 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006259 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006260 return(NULL);
6261 }
Owen Taylor3473f882001-02-23 17:55:21 +00006262 GROW;
6263 if (RAW == '?') {
6264 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6265 NEXT;
6266 } else if (RAW == '*') {
6267 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6268 NEXT;
6269 } else if (RAW == '+') {
6270 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6271 NEXT;
6272 } else {
6273 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6274 }
Owen Taylor3473f882001-02-23 17:55:21 +00006275 GROW;
6276 }
6277 SKIP_BLANKS;
6278 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006279 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006280 /*
6281 * Each loop we parse one separator and one element.
6282 */
6283 if (RAW == ',') {
6284 if (type == 0) type = CUR;
6285
6286 /*
6287 * Detect "Name | Name , Name" error
6288 */
6289 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006290 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006291 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006292 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006293 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006294 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006295 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006296 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006297 return(NULL);
6298 }
6299 NEXT;
6300
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006301 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006302 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006303 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006304 xmlFreeDocElementContent(ctxt->myDoc, last);
6305 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 return(NULL);
6307 }
6308 if (last == NULL) {
6309 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006310 if (ret != NULL)
6311 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006312 ret = cur = op;
6313 } else {
6314 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006315 if (op != NULL)
6316 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006317 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006318 if (last != NULL)
6319 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006320 cur =op;
6321 last = NULL;
6322 }
6323 } else if (RAW == '|') {
6324 if (type == 0) type = CUR;
6325
6326 /*
6327 * Detect "Name , Name | Name" error
6328 */
6329 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006330 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006331 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006332 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006333 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006334 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006335 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006336 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 return(NULL);
6338 }
6339 NEXT;
6340
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006341 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006342 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006343 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006344 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006345 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006346 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006347 return(NULL);
6348 }
6349 if (last == NULL) {
6350 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006351 if (ret != NULL)
6352 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006353 ret = cur = op;
6354 } else {
6355 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006356 if (op != NULL)
6357 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006358 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006359 if (last != NULL)
6360 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006361 cur =op;
6362 last = NULL;
6363 }
6364 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006365 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006366 if ((last != NULL) && (last != ret))
6367 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006368 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006369 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 return(NULL);
6371 }
6372 GROW;
6373 SKIP_BLANKS;
6374 GROW;
6375 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006376 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006377 /* Recurse on second child */
6378 NEXT;
6379 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006380 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6381 depth + 1);
Elliott Hughes5cefca72021-05-06 13:23:15 -07006382 if (last == NULL) {
6383 if (ret != NULL)
6384 xmlFreeDocElementContent(ctxt->myDoc, ret);
6385 return(NULL);
6386 }
Owen Taylor3473f882001-02-23 17:55:21 +00006387 SKIP_BLANKS;
6388 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006389 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006390 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006391 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006392 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006393 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006394 return(NULL);
6395 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006396 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006397 if (last == NULL) {
6398 if (ret != NULL)
6399 xmlFreeDocElementContent(ctxt->myDoc, ret);
6400 return(NULL);
6401 }
Owen Taylor3473f882001-02-23 17:55:21 +00006402 if (RAW == '?') {
6403 last->ocur = XML_ELEMENT_CONTENT_OPT;
6404 NEXT;
6405 } else if (RAW == '*') {
6406 last->ocur = XML_ELEMENT_CONTENT_MULT;
6407 NEXT;
6408 } else if (RAW == '+') {
6409 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6410 NEXT;
6411 } else {
6412 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6413 }
6414 }
6415 SKIP_BLANKS;
6416 GROW;
6417 }
6418 if ((cur != NULL) && (last != NULL)) {
6419 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006420 if (last != NULL)
6421 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006422 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006423 if (ctxt->input->id != inputchk) {
6424 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6425 "Element content declaration doesn't start and stop in"
6426 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006427 }
Owen Taylor3473f882001-02-23 17:55:21 +00006428 NEXT;
6429 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006430 if (ret != NULL) {
6431 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6432 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6433 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6434 else
6435 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6436 }
Owen Taylor3473f882001-02-23 17:55:21 +00006437 NEXT;
6438 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006439 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006440 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006441 cur = ret;
6442 /*
6443 * Some normalization:
6444 * (a | b* | c?)* == (a | b | c)*
6445 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006446 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006447 if ((cur->c1 != NULL) &&
6448 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6449 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6450 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6451 if ((cur->c2 != NULL) &&
6452 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6453 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6454 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6455 cur = cur->c2;
6456 }
6457 }
Owen Taylor3473f882001-02-23 17:55:21 +00006458 NEXT;
6459 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006460 if (ret != NULL) {
6461 int found = 0;
6462
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006463 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6464 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6465 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006466 else
6467 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006468 /*
6469 * Some normalization:
6470 * (a | b*)+ == (a | b)*
6471 * (a | b?)+ == (a | b)*
6472 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006473 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006474 if ((cur->c1 != NULL) &&
6475 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6476 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6477 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6478 found = 1;
6479 }
6480 if ((cur->c2 != NULL) &&
6481 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6484 found = 1;
6485 }
6486 cur = cur->c2;
6487 }
6488 if (found)
6489 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6490 }
Owen Taylor3473f882001-02-23 17:55:21 +00006491 NEXT;
6492 }
6493 return(ret);
6494}
6495
6496/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006497 * xmlParseElementChildrenContentDecl:
6498 * @ctxt: an XML parser context
6499 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006500 *
6501 * parse the declaration for a Mixed Element content
6502 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6503 *
6504 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6505 *
6506 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6507 *
6508 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6509 *
6510 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6511 *
6512 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6513 * TODO Parameter-entity replacement text must be properly nested
6514 * with parenthesized groups. That is to say, if either of the
6515 * opening or closing parentheses in a choice, seq, or Mixed
6516 * construct is contained in the replacement text for a parameter
6517 * entity, both must be contained in the same replacement text. For
6518 * interoperability, if a parameter-entity reference appears in a
6519 * choice, seq, or Mixed construct, its replacement text should not
6520 * be empty, and neither the first nor last non-blank character of
6521 * the replacement text should be a connector (| or ,).
6522 *
6523 * Returns the tree of xmlElementContentPtr describing the element
6524 * hierarchy.
6525 */
6526xmlElementContentPtr
6527xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6528 /* stub left for API/ABI compat */
6529 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6530}
6531
6532/**
Owen Taylor3473f882001-02-23 17:55:21 +00006533 * xmlParseElementContentDecl:
6534 * @ctxt: an XML parser context
6535 * @name: the name of the element being defined.
6536 * @result: the Element Content pointer will be stored here if any
6537 *
6538 * parse the declaration for an Element content either Mixed or Children,
6539 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006540 *
Owen Taylor3473f882001-02-23 17:55:21 +00006541 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6542 *
6543 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6544 */
6545
6546int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006547xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006548 xmlElementContentPtr *result) {
6549
6550 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006551 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006552 int res;
6553
6554 *result = NULL;
6555
6556 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006557 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006558 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006559 return(-1);
6560 }
6561 NEXT;
6562 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006563 if (ctxt->instate == XML_PARSER_EOF)
6564 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006566 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006567 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006568 res = XML_ELEMENT_TYPE_MIXED;
6569 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006570 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006571 res = XML_ELEMENT_TYPE_ELEMENT;
6572 }
Owen Taylor3473f882001-02-23 17:55:21 +00006573 SKIP_BLANKS;
6574 *result = tree;
6575 return(res);
6576}
6577
6578/**
6579 * xmlParseElementDecl:
6580 * @ctxt: an XML parser context
6581 *
6582 * parse an Element declaration.
6583 *
6584 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6585 *
6586 * [ VC: Unique Element Type Declaration ]
6587 * No element type may be declared more than once
6588 *
6589 * Returns the type of the element, or -1 in case of error
6590 */
6591int
6592xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006593 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006594 int ret = -1;
6595 xmlElementContentPtr content = NULL;
6596
Daniel Veillard4c778d82005-01-23 17:37:44 +00006597 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006598 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006599 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006600
6601 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006602 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006603 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6604 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006605 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006606 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006607 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006608 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006609 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6610 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006611 return(-1);
6612 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006613 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6615 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006616 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006617 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006618 SKIP(5);
6619 /*
6620 * Element must always be empty.
6621 */
6622 ret = XML_ELEMENT_TYPE_EMPTY;
6623 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6624 (NXT(2) == 'Y')) {
6625 SKIP(3);
6626 /*
6627 * Element is a generic container.
6628 */
6629 ret = XML_ELEMENT_TYPE_ANY;
6630 } else if (RAW == '(') {
6631 ret = xmlParseElementContentDecl(ctxt, name, &content);
6632 } else {
6633 /*
6634 * [ WFC: PEs in Internal Subset ] error handling.
6635 */
6636 if ((RAW == '%') && (ctxt->external == 0) &&
6637 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006638 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006639 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006640 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006641 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006642 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6643 }
Owen Taylor3473f882001-02-23 17:55:21 +00006644 return(-1);
6645 }
6646
6647 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006648
6649 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006650 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006651 if (content != NULL) {
6652 xmlFreeDocElementContent(ctxt->myDoc, content);
6653 }
Owen Taylor3473f882001-02-23 17:55:21 +00006654 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006655 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006656 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006657 "Element declaration doesn't start and stop in"
6658 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006659 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006660
Owen Taylor3473f882001-02-23 17:55:21 +00006661 NEXT;
6662 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006663 (ctxt->sax->elementDecl != NULL)) {
6664 if (content != NULL)
6665 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006666 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6667 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006668 if ((content != NULL) && (content->parent == NULL)) {
6669 /*
6670 * this is a trick: if xmlAddElementDecl is called,
6671 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006672 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006673 * interfaces or change the API/ABI
6674 */
6675 xmlFreeDocElementContent(ctxt->myDoc, content);
6676 }
6677 } else if (content != NULL) {
6678 xmlFreeDocElementContent(ctxt->myDoc, content);
6679 }
Owen Taylor3473f882001-02-23 17:55:21 +00006680 }
Owen Taylor3473f882001-02-23 17:55:21 +00006681 }
6682 return(ret);
6683}
6684
6685/**
Owen Taylor3473f882001-02-23 17:55:21 +00006686 * xmlParseConditionalSections
6687 * @ctxt: an XML parser context
6688 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006689 * [61] conditionalSect ::= includeSect | ignoreSect
6690 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006691 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6692 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6693 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6694 */
6695
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006696static void
Owen Taylor3473f882001-02-23 17:55:21 +00006697xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006698 int *inputIds = NULL;
6699 size_t inputIdsSize = 0;
6700 size_t depth = 0;
Daniel Veillard49d44052008-08-27 19:57:06 +00006701
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006702 while (ctxt->instate != XML_PARSER_EOF) {
6703 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6704 int id = ctxt->input->id;
6705
6706 SKIP(3);
6707 SKIP_BLANKS;
6708
6709 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6710 SKIP(7);
6711 SKIP_BLANKS;
6712 if (RAW != '[') {
6713 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6714 xmlHaltParser(ctxt);
6715 goto error;
6716 }
6717 if (ctxt->input->id != id) {
6718 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6719 "All markup of the conditional section is"
6720 " not in the same entity\n");
6721 }
6722 NEXT;
6723
6724 if (inputIdsSize <= depth) {
6725 int *tmp;
6726
6727 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6728 tmp = (int *) xmlRealloc(inputIds,
6729 inputIdsSize * sizeof(int));
6730 if (tmp == NULL) {
6731 xmlErrMemory(ctxt, NULL);
6732 goto error;
6733 }
6734 inputIds = tmp;
6735 }
6736 inputIds[depth] = id;
6737 depth++;
6738 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6739 int state;
6740 xmlParserInputState instate;
6741 size_t ignoreDepth = 0;
6742
6743 SKIP(6);
6744 SKIP_BLANKS;
6745 if (RAW != '[') {
6746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6747 xmlHaltParser(ctxt);
6748 goto error;
6749 }
6750 if (ctxt->input->id != id) {
6751 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752 "All markup of the conditional section is"
6753 " not in the same entity\n");
6754 }
6755 NEXT;
6756
6757 /*
6758 * Parse up to the end of the conditional section but disable
6759 * SAX event generating DTD building in the meantime
6760 */
6761 state = ctxt->disableSAX;
6762 instate = ctxt->instate;
6763 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6764 ctxt->instate = XML_PARSER_IGNORE;
6765
6766 while (RAW != 0) {
6767 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6768 SKIP(3);
6769 ignoreDepth++;
6770 /* Check for integer overflow */
6771 if (ignoreDepth == 0) {
6772 xmlErrMemory(ctxt, NULL);
6773 goto error;
6774 }
6775 } else if ((RAW == ']') && (NXT(1) == ']') &&
6776 (NXT(2) == '>')) {
6777 if (ignoreDepth == 0)
6778 break;
6779 SKIP(3);
6780 ignoreDepth--;
6781 } else {
6782 NEXT;
6783 }
6784 }
6785
6786 ctxt->disableSAX = state;
6787 ctxt->instate = instate;
6788
6789 if (RAW == 0) {
6790 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6791 goto error;
6792 }
6793 if (ctxt->input->id != id) {
6794 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6795 "All markup of the conditional section is"
6796 " not in the same entity\n");
6797 }
6798 SKIP(3);
6799 } else {
6800 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6801 xmlHaltParser(ctxt);
6802 goto error;
6803 }
6804 } else if ((depth > 0) &&
6805 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6806 depth--;
6807 if (ctxt->input->id != inputIds[depth]) {
6808 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6809 "All markup of the conditional section is not"
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006810 " in the same entity\n");
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006811 }
6812 SKIP(3);
6813 } else {
6814 const xmlChar *check = CUR_PTR;
6815 unsigned int cons = ctxt->input->consumed;
6816
6817 xmlParseMarkupDecl(ctxt);
6818
6819 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6820 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6821 xmlHaltParser(ctxt);
6822 goto error;
6823 }
6824 }
6825
6826 if (depth == 0)
6827 break;
Owen Taylor3473f882001-02-23 17:55:21 +00006828
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006829 SKIP_BLANKS;
6830 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006831 }
6832
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006833error:
6834 xmlFree(inputIds);
Owen Taylor3473f882001-02-23 17:55:21 +00006835}
6836
6837/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006838 * xmlParseMarkupDecl:
6839 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006840 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006841 * parse Markup declarations
6842 *
6843 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6844 * NotationDecl | PI | Comment
6845 *
6846 * [ VC: Proper Declaration/PE Nesting ]
6847 * Parameter-entity replacement text must be properly nested with
6848 * markup declarations. That is to say, if either the first character
6849 * or the last character of a markup declaration (markupdecl above) is
6850 * contained in the replacement text for a parameter-entity reference,
6851 * both must be contained in the same replacement text.
6852 *
6853 * [ WFC: PEs in Internal Subset ]
6854 * In the internal DTD subset, parameter-entity references can occur
6855 * only where markup declarations can occur, not within markup declarations.
6856 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006857 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006858 */
6859void
6860xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6861 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006862 if (CUR == '<') {
6863 if (NXT(1) == '!') {
6864 switch (NXT(2)) {
6865 case 'E':
6866 if (NXT(3) == 'L')
6867 xmlParseElementDecl(ctxt);
6868 else if (NXT(3) == 'N')
6869 xmlParseEntityDecl(ctxt);
6870 break;
6871 case 'A':
6872 xmlParseAttributeListDecl(ctxt);
6873 break;
6874 case 'N':
6875 xmlParseNotationDecl(ctxt);
6876 break;
6877 case '-':
6878 xmlParseComment(ctxt);
6879 break;
6880 default:
6881 /* there is an error but it will be detected later */
6882 break;
6883 }
6884 } else if (NXT(1) == '?') {
6885 xmlParsePI(ctxt);
6886 }
6887 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006888
6889 /*
6890 * detect requirement to exit there and act accordingly
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006891 * and avoid having instate overridden later on
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006892 */
6893 if (ctxt->instate == XML_PARSER_EOF)
6894 return;
6895
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006896 ctxt->instate = XML_PARSER_DTD;
6897}
6898
6899/**
6900 * xmlParseTextDecl:
6901 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006902 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006903 * parse an XML declaration header for external entities
6904 *
6905 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006906 */
6907
6908void
6909xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6910 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006911 const xmlChar *encoding;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006912 int oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006913
6914 /*
6915 * We know that '<?xml' is here.
6916 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006917 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006918 SKIP(5);
6919 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006920 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006921 return;
6922 }
6923
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006924 /* Avoid expansion of parameter entities when skipping blanks. */
6925 oldstate = ctxt->instate;
6926 ctxt->instate = XML_PARSER_START;
6927
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006928 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6930 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006931 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006932
6933 /*
6934 * We may have the VersionInfo here.
6935 */
6936 version = xmlParseVersionInfo(ctxt);
6937 if (version == NULL)
6938 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006939 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006940 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006941 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6942 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006943 }
6944 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006945 ctxt->input->version = version;
6946
6947 /*
6948 * We must have the encoding declaration
6949 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006950 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006951 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6952 /*
6953 * The XML REC instructs us to stop parsing right here
6954 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006955 ctxt->instate = oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006956 return;
6957 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006958 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6959 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6960 "Missing encoding in text declaration\n");
6961 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006962
6963 SKIP_BLANKS;
6964 if ((RAW == '?') && (NXT(1) == '>')) {
6965 SKIP(2);
6966 } else if (RAW == '>') {
6967 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006968 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006969 NEXT;
6970 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006971 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006972 MOVETO_ENDTAG(CUR_PTR);
6973 NEXT;
6974 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006975
6976 ctxt->instate = oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006977}
6978
6979/**
Owen Taylor3473f882001-02-23 17:55:21 +00006980 * xmlParseExternalSubset:
6981 * @ctxt: an XML parser context
6982 * @ExternalID: the external identifier
6983 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006984 *
Owen Taylor3473f882001-02-23 17:55:21 +00006985 * parse Markup declarations from an external subset
6986 *
6987 * [30] extSubset ::= textDecl? extSubsetDecl
6988 *
6989 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6990 */
6991void
6992xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6993 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006994 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006995 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006996
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006997 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006998 (ctxt->input->end - ctxt->input->cur >= 4)) {
6999 xmlChar start[4];
7000 xmlCharEncoding enc;
7001
7002 start[0] = RAW;
7003 start[1] = NXT(1);
7004 start[2] = NXT(2);
7005 start[3] = NXT(3);
7006 enc = xmlDetectCharEncoding(start, 4);
7007 if (enc != XML_CHAR_ENCODING_NONE)
7008 xmlSwitchEncoding(ctxt, enc);
7009 }
7010
Daniel Veillarda07050d2003-10-19 14:46:32 +00007011 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007012 xmlParseTextDecl(ctxt);
7013 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7014 /*
7015 * The XML REC instructs us to stop parsing right here
7016 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007017 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007018 return;
7019 }
7020 }
7021 if (ctxt->myDoc == NULL) {
7022 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007023 if (ctxt->myDoc == NULL) {
7024 xmlErrMemory(ctxt, "New Doc failed");
7025 return;
7026 }
7027 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007028 }
7029 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7030 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7031
7032 ctxt->instate = XML_PARSER_DTD;
7033 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007034 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00007035 while (((RAW == '<') && (NXT(1) == '?')) ||
7036 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007037 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007038 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007039 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007040
7041 GROW;
7042 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7043 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007044 } else
7045 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007046 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00007047
Daniel Veillardfdc91562002-07-01 21:52:03 +00007048 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007049 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007050 break;
7051 }
7052 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007053
Owen Taylor3473f882001-02-23 17:55:21 +00007054 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007055 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007056 }
7057
7058}
7059
7060/**
7061 * xmlParseReference:
7062 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007063 *
Owen Taylor3473f882001-02-23 17:55:21 +00007064 * parse and handle entity references in content, depending on the SAX
7065 * interface, this may end-up in a call to character() if this is a
7066 * CharRef, a predefined entity, if there is no reference() callback.
7067 * or if the parser was asked to switch to that mode.
7068 *
7069 * [67] Reference ::= EntityRef | CharRef
7070 */
7071void
7072xmlParseReference(xmlParserCtxtPtr ctxt) {
7073 xmlEntityPtr ent;
7074 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007075 int was_checked;
7076 xmlNodePtr list = NULL;
7077 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007078
Daniel Veillard0161e632008-08-28 15:36:32 +00007079
7080 if (RAW != '&')
7081 return;
7082
7083 /*
7084 * Simple case of a CharRef
7085 */
Owen Taylor3473f882001-02-23 17:55:21 +00007086 if (NXT(1) == '#') {
7087 int i = 0;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007088 xmlChar out[16];
Owen Taylor3473f882001-02-23 17:55:21 +00007089 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007090 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007091
Daniel Veillarddc171602008-03-26 17:41:38 +00007092 if (value == 0)
7093 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007094 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7095 /*
7096 * So we are using non-UTF-8 buffers
7097 * Check that the char fit on 8bits, if not
7098 * generate a CharRef.
7099 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007100 if (value <= 0xFF) {
7101 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007102 out[1] = 0;
7103 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7104 (!ctxt->disableSAX))
7105 ctxt->sax->characters(ctxt->userData, out, 1);
7106 } else {
7107 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007108 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007109 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007110 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007111 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7112 (!ctxt->disableSAX))
7113 ctxt->sax->reference(ctxt->userData, out);
7114 }
7115 } else {
7116 /*
7117 * Just encode the value in UTF-8
7118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007119 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007120 out[i] = 0;
7121 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7122 (!ctxt->disableSAX))
7123 ctxt->sax->characters(ctxt->userData, out, i);
7124 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007125 return;
7126 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007127
Daniel Veillard0161e632008-08-28 15:36:32 +00007128 /*
7129 * We are seeing an entity reference
7130 */
7131 ent = xmlParseEntityRef(ctxt);
7132 if (ent == NULL) return;
7133 if (!ctxt->wellFormed)
7134 return;
7135 was_checked = ent->checked;
7136
7137 /* special case of predefined entities */
7138 if ((ent->name == NULL) ||
7139 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7140 val = ent->content;
7141 if (val == NULL) return;
7142 /*
7143 * inline the entity.
7144 */
7145 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7146 (!ctxt->disableSAX))
7147 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7148 return;
7149 }
7150
7151 /*
7152 * The first reference to the entity trigger a parsing phase
7153 * where the ent->children is filled with the result from
7154 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007155 * Note: external parsed entities will not be loaded, it is not
7156 * required for a non-validating parser, unless the parsing option
7157 * of validating, or substituting entities were given. Doing so is
7158 * far more secure as the parser will only process data coming from
7159 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007160 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007161 if (((ent->checked == 0) ||
7162 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007163 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7164 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007165 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillard0161e632008-08-28 15:36:32 +00007166
7167 /*
7168 * This is a bit hackish but this seems the best
7169 * way to make sure both SAX and DOM entity support
7170 * behaves okay.
7171 */
7172 void *user_data;
7173 if (ctxt->userData == ctxt)
7174 user_data = NULL;
7175 else
7176 user_data = ctxt->userData;
7177
7178 /*
7179 * Check that this entity is well formed
7180 * 4.3.2: An internal general parsed entity is well-formed
7181 * if its replacement text matches the production labeled
7182 * content.
7183 */
7184 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7185 ctxt->depth++;
7186 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7187 user_data, &list);
7188 ctxt->depth--;
7189
7190 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7191 ctxt->depth++;
7192 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7193 user_data, ctxt->depth, ent->URI,
7194 ent->ExternalID, &list);
7195 ctxt->depth--;
7196 } else {
7197 ret = XML_ERR_ENTITY_PE_INTERNAL;
7198 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7199 "invalid entity type found\n", NULL);
7200 }
7201
7202 /*
7203 * Store the number of entities needing parsing for this entity
7204 * content and do checkings
7205 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007206 diff = ctxt->nbentities - oldnbent + 1;
7207 if (diff > INT_MAX / 2)
7208 diff = INT_MAX / 2;
7209 ent->checked = diff * 2;
Daniel Veillardcff25462013-03-11 15:57:55 +08007210 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7211 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007212 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007213 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Haibo Huangd75f3892021-01-05 21:34:50 -08007214 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007215 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007216 return;
7217 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007218 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007219 xmlFreeNodeList(list);
7220 return;
7221 }
Owen Taylor3473f882001-02-23 17:55:21 +00007222
Daniel Veillard0161e632008-08-28 15:36:32 +00007223 if ((ret == XML_ERR_OK) && (list != NULL)) {
7224 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7225 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7226 (ent->children == NULL)) {
7227 ent->children = list;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007228 /*
7229 * Prune it directly in the generated document
7230 * except for single text nodes.
7231 */
7232 if ((ctxt->replaceEntities == 0) ||
7233 (ctxt->parseMode == XML_PARSE_READER) ||
7234 ((list->type == XML_TEXT_NODE) &&
7235 (list->next == NULL))) {
7236 ent->owner = 1;
7237 while (list != NULL) {
7238 list->parent = (xmlNodePtr) ent;
7239 xmlSetTreeDoc(list, ent->doc);
7240 if (list->next == NULL)
7241 ent->last = list;
7242 list = list->next;
7243 }
7244 list = NULL;
7245 } else {
7246 ent->owner = 0;
7247 while (list != NULL) {
7248 list->parent = (xmlNodePtr) ctxt->node;
7249 list->doc = ctxt->myDoc;
7250 if (list->next == NULL)
7251 ent->last = list;
7252 list = list->next;
7253 }
7254 list = ent->children;
Daniel Veillard0161e632008-08-28 15:36:32 +00007255#ifdef LIBXML_LEGACY_ENABLED
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007256 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7257 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007258#endif /* LIBXML_LEGACY_ENABLED */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007259 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007260 } else {
7261 xmlFreeNodeList(list);
7262 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007263 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007264 } else if ((ret != XML_ERR_OK) &&
7265 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7266 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7267 "Entity '%s' failed to parse\n", ent->name);
Nick Wellnhofer60dded12018-01-22 15:04:58 +01007268 if (ent->content != NULL)
7269 ent->content[0] = 0;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007270 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007271 } else if (list != NULL) {
7272 xmlFreeNodeList(list);
7273 list = NULL;
7274 }
7275 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007276 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007277
7278 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7279 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007280 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007281 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007282 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007283
Daniel Veillard0161e632008-08-28 15:36:32 +00007284 /*
7285 * Now that the entity content has been gathered
7286 * provide it to the application, this can take different forms based
7287 * on the parsing modes.
7288 */
7289 if (ent->children == NULL) {
7290 /*
7291 * Probably running in SAX mode and the callbacks don't
7292 * build the entity content. So unless we already went
7293 * though parsing for first checking go though the entity
7294 * content to generate callbacks associated to the entity
7295 */
7296 if (was_checked != 0) {
7297 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007298 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007299 * This is a bit hackish but this seems the best
7300 * way to make sure both SAX and DOM entity support
7301 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007302 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007303 if (ctxt->userData == ctxt)
7304 user_data = NULL;
7305 else
7306 user_data = ctxt->userData;
7307
7308 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7309 ctxt->depth++;
7310 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7311 ent->content, user_data, NULL);
7312 ctxt->depth--;
7313 } else if (ent->etype ==
7314 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7315 ctxt->depth++;
7316 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7317 ctxt->sax, user_data, ctxt->depth,
7318 ent->URI, ent->ExternalID, NULL);
7319 ctxt->depth--;
7320 } else {
7321 ret = XML_ERR_ENTITY_PE_INTERNAL;
7322 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7323 "invalid entity type found\n", NULL);
7324 }
7325 if (ret == XML_ERR_ENTITY_LOOP) {
7326 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7327 return;
7328 }
7329 }
7330 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7331 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7332 /*
7333 * Entity reference callback comes second, it's somewhat
7334 * superfluous but a compatibility to historical behaviour
7335 */
7336 ctxt->sax->reference(ctxt->userData, ent->name);
7337 }
7338 return;
7339 }
7340
7341 /*
7342 * If we didn't get any children for the entity being built
7343 */
7344 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7345 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7346 /*
7347 * Create a node.
7348 */
7349 ctxt->sax->reference(ctxt->userData, ent->name);
7350 return;
7351 }
7352
7353 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7354 /*
7355 * There is a problem on the handling of _private for entities
7356 * (bug 155816): Should we copy the content of the field from
7357 * the entity (possibly overwriting some value set by the user
7358 * when a copy is created), should we leave it alone, or should
7359 * we try to take care of different situations? The problem
7360 * is exacerbated by the usage of this field by the xmlReader.
7361 * To fix this bug, we look at _private on the created node
7362 * and, if it's NULL, we copy in whatever was in the entity.
7363 * If it's not NULL we leave it alone. This is somewhat of a
7364 * hack - maybe we should have further tests to determine
7365 * what to do.
7366 */
7367 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7368 /*
7369 * Seems we are generating the DOM content, do
7370 * a simple tree copy for all references except the first
7371 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007372 */
7373 if (((list == NULL) && (ent->owner == 0)) ||
7374 (ctxt->parseMode == XML_PARSE_READER)) {
7375 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7376
7377 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007378 * We are copying here, make sure there is no abuse
7379 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007380 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007381 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7382 return;
7383
7384 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007385 * when operating on a reader, the entities definitions
7386 * are always owning the entities subtree.
7387 if (ctxt->parseMode == XML_PARSE_READER)
7388 ent->owner = 1;
7389 */
7390
7391 cur = ent->children;
7392 while (cur != NULL) {
7393 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7394 if (nw != NULL) {
7395 if (nw->_private == NULL)
7396 nw->_private = cur->_private;
7397 if (firstChild == NULL){
7398 firstChild = nw;
7399 }
7400 nw = xmlAddChild(ctxt->node, nw);
7401 }
7402 if (cur == ent->last) {
7403 /*
7404 * needed to detect some strange empty
7405 * node cases in the reader tests
7406 */
7407 if ((ctxt->parseMode == XML_PARSE_READER) &&
7408 (nw != NULL) &&
7409 (nw->type == XML_ELEMENT_NODE) &&
7410 (nw->children == NULL))
7411 nw->extra = 1;
7412
7413 break;
7414 }
7415 cur = cur->next;
7416 }
7417#ifdef LIBXML_LEGACY_ENABLED
7418 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7419 xmlAddEntityReference(ent, firstChild, nw);
7420#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007421 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007422 xmlNodePtr nw = NULL, cur, next, last,
7423 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007424
7425 /*
7426 * We are copying here, make sure there is no abuse
7427 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007428 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007429 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7430 return;
7431
Daniel Veillard0161e632008-08-28 15:36:32 +00007432 /*
7433 * Copy the entity child list and make it the new
7434 * entity child list. The goal is to make sure any
7435 * ID or REF referenced will be the one from the
7436 * document content and not the entity copy.
7437 */
7438 cur = ent->children;
7439 ent->children = NULL;
7440 last = ent->last;
7441 ent->last = NULL;
7442 while (cur != NULL) {
7443 next = cur->next;
7444 cur->next = NULL;
7445 cur->parent = NULL;
7446 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7447 if (nw != NULL) {
7448 if (nw->_private == NULL)
7449 nw->_private = cur->_private;
7450 if (firstChild == NULL){
7451 firstChild = cur;
7452 }
7453 xmlAddChild((xmlNodePtr) ent, nw);
7454 xmlAddChild(ctxt->node, cur);
7455 }
7456 if (cur == last)
7457 break;
7458 cur = next;
7459 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007460 if (ent->owner == 0)
7461 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007462#ifdef LIBXML_LEGACY_ENABLED
7463 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7464 xmlAddEntityReference(ent, firstChild, nw);
7465#endif /* LIBXML_LEGACY_ENABLED */
7466 } else {
7467 const xmlChar *nbktext;
7468
7469 /*
7470 * the name change is to avoid coalescing of the
7471 * node with a possible previous text one which
7472 * would make ent->children a dangling pointer
7473 */
7474 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7475 -1);
7476 if (ent->children->type == XML_TEXT_NODE)
7477 ent->children->name = nbktext;
7478 if ((ent->last != ent->children) &&
7479 (ent->last->type == XML_TEXT_NODE))
7480 ent->last->name = nbktext;
7481 xmlAddChildList(ctxt->node, ent->children);
7482 }
7483
7484 /*
7485 * This is to avoid a nasty side effect, see
7486 * characters() in SAX.c
7487 */
7488 ctxt->nodemem = 0;
7489 ctxt->nodelen = 0;
7490 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007491 }
7492 }
7493}
7494
7495/**
7496 * xmlParseEntityRef:
7497 * @ctxt: an XML parser context
7498 *
7499 * parse ENTITY references declarations
7500 *
7501 * [68] EntityRef ::= '&' Name ';'
7502 *
7503 * [ WFC: Entity Declared ]
7504 * In a document without any DTD, a document with only an internal DTD
7505 * subset which contains no parameter entity references, or a document
7506 * with "standalone='yes'", the Name given in the entity reference
7507 * must match that in an entity declaration, except that well-formed
7508 * documents need not declare any of the following entities: amp, lt,
7509 * gt, apos, quot. The declaration of a parameter entity must precede
7510 * any reference to it. Similarly, the declaration of a general entity
7511 * must precede any reference to it which appears in a default value in an
7512 * attribute-list declaration. Note that if entities are declared in the
7513 * external subset or in external parameter entities, a non-validating
7514 * processor is not obligated to read and process their declarations;
7515 * for such documents, the rule that an entity must be declared is a
7516 * well-formedness constraint only if standalone='yes'.
7517 *
7518 * [ WFC: Parsed Entity ]
7519 * An entity reference must not contain the name of an unparsed entity
7520 *
7521 * Returns the xmlEntityPtr if found, or NULL otherwise.
7522 */
7523xmlEntityPtr
7524xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007525 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007526 xmlEntityPtr ent = NULL;
7527
7528 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007529 if (ctxt->instate == XML_PARSER_EOF)
7530 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007531
Daniel Veillard0161e632008-08-28 15:36:32 +00007532 if (RAW != '&')
7533 return(NULL);
7534 NEXT;
7535 name = xmlParseName(ctxt);
7536 if (name == NULL) {
7537 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7538 "xmlParseEntityRef: no name\n");
7539 return(NULL);
7540 }
7541 if (RAW != ';') {
7542 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7543 return(NULL);
7544 }
7545 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007546
Daniel Veillard0161e632008-08-28 15:36:32 +00007547 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007548 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007549 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007550 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7551 ent = xmlGetPredefinedEntity(name);
7552 if (ent != NULL)
7553 return(ent);
7554 }
Owen Taylor3473f882001-02-23 17:55:21 +00007555
Daniel Veillard0161e632008-08-28 15:36:32 +00007556 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007557 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007558 */
7559 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007560
Daniel Veillard0161e632008-08-28 15:36:32 +00007561 /*
7562 * Ask first SAX for entity resolution, otherwise try the
7563 * entities which may have stored in the parser context.
7564 */
7565 if (ctxt->sax != NULL) {
7566 if (ctxt->sax->getEntity != NULL)
7567 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007568 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007569 (ctxt->options & XML_PARSE_OLDSAX))
7570 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007571 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7572 (ctxt->userData==ctxt)) {
7573 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007574 }
7575 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007576 if (ctxt->instate == XML_PARSER_EOF)
7577 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007578 /*
7579 * [ WFC: Entity Declared ]
7580 * In a document without any DTD, a document with only an
7581 * internal DTD subset which contains no parameter entity
7582 * references, or a document with "standalone='yes'", the
7583 * Name given in the entity reference must match that in an
7584 * entity declaration, except that well-formed documents
7585 * need not declare any of the following entities: amp, lt,
7586 * gt, apos, quot.
7587 * The declaration of a parameter entity must precede any
7588 * reference to it.
7589 * Similarly, the declaration of a general entity must
7590 * precede any reference to it which appears in a default
7591 * value in an attribute-list declaration. Note that if
7592 * entities are declared in the external subset or in
7593 * external parameter entities, a non-validating processor
7594 * is not obligated to read and process their declarations;
7595 * for such documents, the rule that an entity must be
7596 * declared is a well-formedness constraint only if
7597 * standalone='yes'.
7598 */
7599 if (ent == NULL) {
7600 if ((ctxt->standalone == 1) ||
7601 ((ctxt->hasExternalSubset == 0) &&
7602 (ctxt->hasPErefs == 0))) {
7603 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604 "Entity '%s' not defined\n", name);
7605 } else {
7606 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607 "Entity '%s' not defined\n", name);
7608 if ((ctxt->inSubset == 0) &&
7609 (ctxt->sax != NULL) &&
7610 (ctxt->sax->reference != NULL)) {
7611 ctxt->sax->reference(ctxt->userData, name);
7612 }
7613 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007614 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007615 ctxt->valid = 0;
7616 }
7617
7618 /*
7619 * [ WFC: Parsed Entity ]
7620 * An entity reference must not contain the name of an
7621 * unparsed entity
7622 */
7623 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7624 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7625 "Entity reference to unparsed entity %s\n", name);
7626 }
7627
7628 /*
7629 * [ WFC: No External Entity References ]
7630 * Attribute values cannot contain direct or indirect
7631 * entity references to external entities.
7632 */
7633 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7634 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7635 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7636 "Attribute references external entity '%s'\n", name);
7637 }
7638 /*
7639 * [ WFC: No < in Attribute Values ]
7640 * The replacement text of any entity referred to directly or
7641 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007642 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007643 */
7644 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007645 (ent != NULL) &&
7646 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007647 if (((ent->checked & 1) || (ent->checked == 0)) &&
7648 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007649 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7650 "'<' in entity '%s' is not allowed in attributes values\n", name);
7651 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007652 }
7653
7654 /*
7655 * Internal check, no parameter entities here ...
7656 */
7657 else {
7658 switch (ent->etype) {
7659 case XML_INTERNAL_PARAMETER_ENTITY:
7660 case XML_EXTERNAL_PARAMETER_ENTITY:
7661 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7662 "Attempt to reference the parameter entity '%s'\n",
7663 name);
7664 break;
7665 default:
7666 break;
7667 }
7668 }
7669
7670 /*
7671 * [ WFC: No Recursion ]
7672 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007673 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007674 * Done somewhere else
7675 */
Owen Taylor3473f882001-02-23 17:55:21 +00007676 return(ent);
7677}
7678
7679/**
7680 * xmlParseStringEntityRef:
7681 * @ctxt: an XML parser context
7682 * @str: a pointer to an index in the string
7683 *
7684 * parse ENTITY references declarations, but this version parses it from
7685 * a string value.
7686 *
7687 * [68] EntityRef ::= '&' Name ';'
7688 *
7689 * [ WFC: Entity Declared ]
7690 * In a document without any DTD, a document with only an internal DTD
7691 * subset which contains no parameter entity references, or a document
7692 * with "standalone='yes'", the Name given in the entity reference
7693 * must match that in an entity declaration, except that well-formed
7694 * documents need not declare any of the following entities: amp, lt,
7695 * gt, apos, quot. The declaration of a parameter entity must precede
7696 * any reference to it. Similarly, the declaration of a general entity
7697 * must precede any reference to it which appears in a default value in an
7698 * attribute-list declaration. Note that if entities are declared in the
7699 * external subset or in external parameter entities, a non-validating
7700 * processor is not obligated to read and process their declarations;
7701 * for such documents, the rule that an entity must be declared is a
7702 * well-formedness constraint only if standalone='yes'.
7703 *
7704 * [ WFC: Parsed Entity ]
7705 * An entity reference must not contain the name of an unparsed entity
7706 *
7707 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7708 * is updated to the current location in the string.
7709 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007710static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007711xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7712 xmlChar *name;
7713 const xmlChar *ptr;
7714 xmlChar cur;
7715 xmlEntityPtr ent = NULL;
7716
7717 if ((str == NULL) || (*str == NULL))
7718 return(NULL);
7719 ptr = *str;
7720 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007721 if (cur != '&')
7722 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007723
Daniel Veillard0161e632008-08-28 15:36:32 +00007724 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007725 name = xmlParseStringName(ctxt, &ptr);
7726 if (name == NULL) {
7727 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7728 "xmlParseStringEntityRef: no name\n");
7729 *str = ptr;
7730 return(NULL);
7731 }
7732 if (*ptr != ';') {
7733 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007734 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007735 *str = ptr;
7736 return(NULL);
7737 }
7738 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007739
Owen Taylor3473f882001-02-23 17:55:21 +00007740
Daniel Veillard0161e632008-08-28 15:36:32 +00007741 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007742 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007743 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007744 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7745 ent = xmlGetPredefinedEntity(name);
7746 if (ent != NULL) {
7747 xmlFree(name);
7748 *str = ptr;
7749 return(ent);
7750 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007751 }
Owen Taylor3473f882001-02-23 17:55:21 +00007752
Daniel Veillard0161e632008-08-28 15:36:32 +00007753 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007754 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007755 */
7756 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007757
Daniel Veillard0161e632008-08-28 15:36:32 +00007758 /*
7759 * Ask first SAX for entity resolution, otherwise try the
7760 * entities which may have stored in the parser context.
7761 */
7762 if (ctxt->sax != NULL) {
7763 if (ctxt->sax->getEntity != NULL)
7764 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007765 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7766 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007767 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7768 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007769 }
7770 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007771 if (ctxt->instate == XML_PARSER_EOF) {
7772 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007773 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007774 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007775
7776 /*
7777 * [ WFC: Entity Declared ]
7778 * In a document without any DTD, a document with only an
7779 * internal DTD subset which contains no parameter entity
7780 * references, or a document with "standalone='yes'", the
7781 * Name given in the entity reference must match that in an
7782 * entity declaration, except that well-formed documents
7783 * need not declare any of the following entities: amp, lt,
7784 * gt, apos, quot.
7785 * The declaration of a parameter entity must precede any
7786 * reference to it.
7787 * Similarly, the declaration of a general entity must
7788 * precede any reference to it which appears in a default
7789 * value in an attribute-list declaration. Note that if
7790 * entities are declared in the external subset or in
7791 * external parameter entities, a non-validating processor
7792 * is not obligated to read and process their declarations;
7793 * for such documents, the rule that an entity must be
7794 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007795 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007796 */
7797 if (ent == NULL) {
7798 if ((ctxt->standalone == 1) ||
7799 ((ctxt->hasExternalSubset == 0) &&
7800 (ctxt->hasPErefs == 0))) {
7801 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7802 "Entity '%s' not defined\n", name);
7803 } else {
7804 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7805 "Entity '%s' not defined\n",
7806 name);
7807 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007808 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007809 /* TODO ? check regressions ctxt->valid = 0; */
7810 }
7811
7812 /*
7813 * [ WFC: Parsed Entity ]
7814 * An entity reference must not contain the name of an
7815 * unparsed entity
7816 */
7817 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7818 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7819 "Entity reference to unparsed entity %s\n", name);
7820 }
7821
7822 /*
7823 * [ WFC: No External Entity References ]
7824 * Attribute values cannot contain direct or indirect
7825 * entity references to external entities.
7826 */
7827 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7828 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7829 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7830 "Attribute references external entity '%s'\n", name);
7831 }
7832 /*
7833 * [ WFC: No < in Attribute Values ]
7834 * The replacement text of any entity referred to directly or
7835 * indirectly in an attribute value (other than "&lt;") must
7836 * not contain a <.
7837 */
7838 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7839 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007840 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007841 (xmlStrchr(ent->content, '<'))) {
7842 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7843 "'<' in entity '%s' is not allowed in attributes values\n",
7844 name);
7845 }
7846
7847 /*
7848 * Internal check, no parameter entities here ...
7849 */
7850 else {
7851 switch (ent->etype) {
7852 case XML_INTERNAL_PARAMETER_ENTITY:
7853 case XML_EXTERNAL_PARAMETER_ENTITY:
7854 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7855 "Attempt to reference the parameter entity '%s'\n",
7856 name);
7857 break;
7858 default:
7859 break;
7860 }
7861 }
7862
7863 /*
7864 * [ WFC: No Recursion ]
7865 * A parsed entity must not contain a recursive reference
7866 * to itself, either directly or indirectly.
7867 * Done somewhere else
7868 */
7869
7870 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007871 *str = ptr;
7872 return(ent);
7873}
7874
7875/**
7876 * xmlParsePEReference:
7877 * @ctxt: an XML parser context
7878 *
7879 * parse PEReference declarations
7880 * The entity content is handled directly by pushing it's content as
7881 * a new input stream.
7882 *
7883 * [69] PEReference ::= '%' Name ';'
7884 *
7885 * [ WFC: No Recursion ]
7886 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007887 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007888 *
7889 * [ WFC: Entity Declared ]
7890 * In a document without any DTD, a document with only an internal DTD
7891 * subset which contains no parameter entity references, or a document
7892 * with "standalone='yes'", ... ... The declaration of a parameter
7893 * entity must precede any reference to it...
7894 *
7895 * [ VC: Entity Declared ]
7896 * In a document with an external subset or external parameter entities
7897 * with "standalone='no'", ... ... The declaration of a parameter entity
7898 * must precede any reference to it...
7899 *
7900 * [ WFC: In DTD ]
7901 * Parameter-entity references may only appear in the DTD.
7902 * NOTE: misleading but this is handled.
7903 */
7904void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007905xmlParsePEReference(xmlParserCtxtPtr ctxt)
7906{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007907 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007908 xmlEntityPtr entity = NULL;
7909 xmlParserInputPtr input;
7910
Daniel Veillard0161e632008-08-28 15:36:32 +00007911 if (RAW != '%')
7912 return;
7913 NEXT;
7914 name = xmlParseName(ctxt);
7915 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007916 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007917 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007918 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007919 if (xmlParserDebugEntities)
7920 xmlGenericError(xmlGenericErrorContext,
7921 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007922 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007923 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007924 return;
7925 }
7926
7927 NEXT;
7928
7929 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007930 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007931 */
7932 ctxt->nbentities++;
7933
7934 /*
7935 * Request the entity from SAX
7936 */
7937 if ((ctxt->sax != NULL) &&
7938 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007939 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7940 if (ctxt->instate == XML_PARSER_EOF)
7941 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007942 if (entity == NULL) {
7943 /*
7944 * [ WFC: Entity Declared ]
7945 * In a document without any DTD, a document with only an
7946 * internal DTD subset which contains no parameter entity
7947 * references, or a document with "standalone='yes'", ...
7948 * ... The declaration of a parameter entity must precede
7949 * any reference to it...
7950 */
7951 if ((ctxt->standalone == 1) ||
7952 ((ctxt->hasExternalSubset == 0) &&
7953 (ctxt->hasPErefs == 0))) {
7954 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7955 "PEReference: %%%s; not found\n",
7956 name);
7957 } else {
7958 /*
7959 * [ VC: Entity Declared ]
7960 * In a document with an external subset or external
7961 * parameter entities with "standalone='no'", ...
7962 * ... The declaration of a parameter entity must
7963 * precede any reference to it...
7964 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007965 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7966 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7967 "PEReference: %%%s; not found\n",
7968 name, NULL);
7969 } else
7970 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7971 "PEReference: %%%s; not found\n",
7972 name, NULL);
7973 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007974 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007975 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007976 } else {
7977 /*
7978 * Internal checking in case the entity quest barfed
7979 */
7980 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7981 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7982 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7983 "Internal: %%%s; is not a parameter entity\n",
7984 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007985 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007986 xmlChar start[4];
7987 xmlCharEncoding enc;
7988
Elliott Hughese54f00d2021-05-13 08:13:46 -07007989 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7990 return;
7991
Neel Mehta90ccb582017-04-07 17:43:02 +02007992 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7993 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7994 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7995 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7996 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7997 (ctxt->replaceEntities == 0) &&
7998 (ctxt->validate == 0))
7999 return;
8000
Daniel Veillard0161e632008-08-28 15:36:32 +00008001 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02008002 if (xmlPushInput(ctxt, input) < 0) {
8003 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00008004 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02008005 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02008006
8007 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8008 /*
8009 * Get the 4 first bytes and decode the charset
8010 * if enc != XML_CHAR_ENCODING_NONE
8011 * plug some encoding conversion routines.
8012 * Note that, since we may have some non-UTF8
8013 * encoding (like UTF16, bug 135229), the 'length'
8014 * is not known, but we can calculate based upon
8015 * the amount of data in the buffer.
8016 */
8017 GROW
8018 if (ctxt->instate == XML_PARSER_EOF)
8019 return;
8020 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8021 start[0] = RAW;
8022 start[1] = NXT(1);
8023 start[2] = NXT(2);
8024 start[3] = NXT(3);
8025 enc = xmlDetectCharEncoding(start, 4);
8026 if (enc != XML_CHAR_ENCODING_NONE) {
8027 xmlSwitchEncoding(ctxt, enc);
8028 }
8029 }
8030
8031 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8032 (IS_BLANK_CH(NXT(5)))) {
8033 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02008034 }
8035 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008036 }
8037 }
8038 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008039}
8040
8041/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008042 * xmlLoadEntityContent:
8043 * @ctxt: an XML parser context
8044 * @entity: an unloaded system entity
8045 *
8046 * Load the original content of the given system entity from the
8047 * ExternalID/SystemID given. This is to be used for Included in Literal
8048 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8049 *
8050 * Returns 0 in case of success and -1 in case of failure
8051 */
8052static int
8053xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8054 xmlParserInputPtr input;
8055 xmlBufferPtr buf;
8056 int l, c;
8057 int count = 0;
8058
8059 if ((ctxt == NULL) || (entity == NULL) ||
8060 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8061 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8062 (entity->content != NULL)) {
8063 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8064 "xmlLoadEntityContent parameter error");
8065 return(-1);
8066 }
8067
8068 if (xmlParserDebugEntities)
8069 xmlGenericError(xmlGenericErrorContext,
8070 "Reading %s entity content input\n", entity->name);
8071
8072 buf = xmlBufferCreate();
8073 if (buf == NULL) {
8074 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8075 "xmlLoadEntityContent parameter error");
8076 return(-1);
8077 }
8078
8079 input = xmlNewEntityInputStream(ctxt, entity);
8080 if (input == NULL) {
8081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082 "xmlLoadEntityContent input error");
8083 xmlBufferFree(buf);
8084 return(-1);
8085 }
8086
8087 /*
8088 * Push the entity as the current input, read char by char
8089 * saving to the buffer until the end of the entity or an error
8090 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008091 if (xmlPushInput(ctxt, input) < 0) {
8092 xmlBufferFree(buf);
8093 return(-1);
8094 }
8095
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008096 GROW;
8097 c = CUR_CHAR(l);
8098 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8099 (IS_CHAR(c))) {
8100 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008101 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008102 count = 0;
8103 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008104 if (ctxt->instate == XML_PARSER_EOF) {
8105 xmlBufferFree(buf);
8106 return(-1);
8107 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008108 }
8109 NEXTL(l);
8110 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008111 if (c == 0) {
8112 count = 0;
8113 GROW;
8114 if (ctxt->instate == XML_PARSER_EOF) {
8115 xmlBufferFree(buf);
8116 return(-1);
8117 }
8118 c = CUR_CHAR(l);
8119 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008120 }
8121
8122 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8123 xmlPopInput(ctxt);
8124 } else if (!IS_CHAR(c)) {
8125 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8126 "xmlLoadEntityContent: invalid char value %d\n",
8127 c);
8128 xmlBufferFree(buf);
8129 return(-1);
8130 }
8131 entity->content = buf->content;
8132 buf->content = NULL;
8133 xmlBufferFree(buf);
8134
8135 return(0);
8136}
8137
8138/**
Owen Taylor3473f882001-02-23 17:55:21 +00008139 * xmlParseStringPEReference:
8140 * @ctxt: an XML parser context
8141 * @str: a pointer to an index in the string
8142 *
8143 * parse PEReference declarations
8144 *
8145 * [69] PEReference ::= '%' Name ';'
8146 *
8147 * [ WFC: No Recursion ]
8148 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008149 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008150 *
8151 * [ WFC: Entity Declared ]
8152 * In a document without any DTD, a document with only an internal DTD
8153 * subset which contains no parameter entity references, or a document
8154 * with "standalone='yes'", ... ... The declaration of a parameter
8155 * entity must precede any reference to it...
8156 *
8157 * [ VC: Entity Declared ]
8158 * In a document with an external subset or external parameter entities
8159 * with "standalone='no'", ... ... The declaration of a parameter entity
8160 * must precede any reference to it...
8161 *
8162 * [ WFC: In DTD ]
8163 * Parameter-entity references may only appear in the DTD.
8164 * NOTE: misleading but this is handled.
8165 *
8166 * Returns the string of the entity content.
8167 * str is updated to the current value of the index
8168 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008169static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008170xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8171 const xmlChar *ptr;
8172 xmlChar cur;
8173 xmlChar *name;
8174 xmlEntityPtr entity = NULL;
8175
8176 if ((str == NULL) || (*str == NULL)) return(NULL);
8177 ptr = *str;
8178 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008179 if (cur != '%')
8180 return(NULL);
8181 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008182 name = xmlParseStringName(ctxt, &ptr);
8183 if (name == NULL) {
8184 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8185 "xmlParseStringPEReference: no name\n");
8186 *str = ptr;
8187 return(NULL);
8188 }
8189 cur = *ptr;
8190 if (cur != ';') {
8191 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8192 xmlFree(name);
8193 *str = ptr;
8194 return(NULL);
8195 }
8196 ptr++;
8197
8198 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008199 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00008200 */
8201 ctxt->nbentities++;
8202
8203 /*
8204 * Request the entity from SAX
8205 */
8206 if ((ctxt->sax != NULL) &&
8207 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008208 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8209 if (ctxt->instate == XML_PARSER_EOF) {
8210 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008211 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008212 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008213 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008214 if (entity == NULL) {
8215 /*
8216 * [ WFC: Entity Declared ]
8217 * In a document without any DTD, a document with only an
8218 * internal DTD subset which contains no parameter entity
8219 * references, or a document with "standalone='yes'", ...
8220 * ... The declaration of a parameter entity must precede
8221 * any reference to it...
8222 */
8223 if ((ctxt->standalone == 1) ||
8224 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8225 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8226 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008227 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008228 /*
8229 * [ VC: Entity Declared ]
8230 * In a document with an external subset or external
8231 * parameter entities with "standalone='no'", ...
8232 * ... The declaration of a parameter entity must
8233 * precede any reference to it...
8234 */
8235 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8236 "PEReference: %%%s; not found\n",
8237 name, NULL);
8238 ctxt->valid = 0;
8239 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008240 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008241 } else {
8242 /*
8243 * Internal checking in case the entity quest barfed
8244 */
8245 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8246 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8247 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8248 "%%%s; is not a parameter entity\n",
8249 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008250 }
8251 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008252 ctxt->hasPErefs = 1;
8253 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008254 *str = ptr;
8255 return(entity);
8256}
8257
8258/**
8259 * xmlParseDocTypeDecl:
8260 * @ctxt: an XML parser context
8261 *
8262 * parse a DOCTYPE declaration
8263 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008264 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008265 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266 *
8267 * [ VC: Root Element Type ]
8268 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008269 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008270 */
8271
8272void
8273xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008274 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008275 xmlChar *ExternalID = NULL;
8276 xmlChar *URI = NULL;
8277
8278 /*
8279 * We know that '<!DOCTYPE' has been detected.
8280 */
8281 SKIP(9);
8282
8283 SKIP_BLANKS;
8284
8285 /*
8286 * Parse the DOCTYPE name.
8287 */
8288 name = xmlParseName(ctxt);
8289 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008290 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8291 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008292 }
8293 ctxt->intSubName = name;
8294
8295 SKIP_BLANKS;
8296
8297 /*
8298 * Check for SystemID and ExternalID
8299 */
8300 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8301
8302 if ((URI != NULL) || (ExternalID != NULL)) {
8303 ctxt->hasExternalSubset = 1;
8304 }
8305 ctxt->extSubURI = URI;
8306 ctxt->extSubSystem = ExternalID;
8307
8308 SKIP_BLANKS;
8309
8310 /*
8311 * Create and update the internal subset.
8312 */
8313 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8314 (!ctxt->disableSAX))
8315 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008316 if (ctxt->instate == XML_PARSER_EOF)
8317 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008318
8319 /*
8320 * Is there any internal subset declarations ?
8321 * they are handled separately in xmlParseInternalSubset()
8322 */
8323 if (RAW == '[')
8324 return;
8325
8326 /*
8327 * We should be at the end of the DOCTYPE declaration.
8328 */
8329 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008330 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008331 }
8332 NEXT;
8333}
8334
8335/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008336 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008337 * @ctxt: an XML parser context
8338 *
8339 * parse the internal subset declaration
8340 *
8341 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8342 */
8343
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008344static void
Owen Taylor3473f882001-02-23 17:55:21 +00008345xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8346 /*
8347 * Is there any DTD definition ?
8348 */
8349 if (RAW == '[') {
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008350 int baseInputNr = ctxt->inputNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008351 ctxt->instate = XML_PARSER_DTD;
8352 NEXT;
8353 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008354 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008355 * PEReferences.
8356 * Subsequence (markupdecl | PEReference | S)*
8357 */
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008358 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008359 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008360 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008361 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008362
8363 SKIP_BLANKS;
8364 xmlParseMarkupDecl(ctxt);
8365 xmlParsePEReference(ctxt);
8366
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008367 /*
8368 * Conditional sections are allowed from external entities included
8369 * by PE References in the internal subset.
8370 */
8371 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8372 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8373 xmlParseConditionalSections(ctxt);
8374 }
8375
Owen Taylor3473f882001-02-23 17:55:21 +00008376 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008377 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008378 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008379 if (ctxt->inputNr > baseInputNr)
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008380 xmlPopInput(ctxt);
8381 else
8382 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008383 }
8384 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008385 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008386 NEXT;
8387 SKIP_BLANKS;
8388 }
8389 }
8390
8391 /*
8392 * We should be at the end of the DOCTYPE declaration.
8393 */
8394 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008395 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008396 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008397 }
8398 NEXT;
8399}
8400
Daniel Veillard81273902003-09-30 00:43:48 +00008401#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008402/**
8403 * xmlParseAttribute:
8404 * @ctxt: an XML parser context
8405 * @value: a xmlChar ** used to store the value of the attribute
8406 *
8407 * parse an attribute
8408 *
8409 * [41] Attribute ::= Name Eq AttValue
8410 *
8411 * [ WFC: No External Entity References ]
8412 * Attribute values cannot contain direct or indirect entity references
8413 * to external entities.
8414 *
8415 * [ WFC: No < in Attribute Values ]
8416 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008417 * an attribute value (other than "&lt;") must not contain a <.
8418 *
Owen Taylor3473f882001-02-23 17:55:21 +00008419 * [ VC: Attribute Value Type ]
8420 * The attribute must have been declared; the value must be of the type
8421 * declared for it.
8422 *
8423 * [25] Eq ::= S? '=' S?
8424 *
8425 * With namespace:
8426 *
8427 * [NS 11] Attribute ::= QName Eq AttValue
8428 *
8429 * Also the case QName == xmlns:??? is handled independently as a namespace
8430 * definition.
8431 *
8432 * Returns the attribute name, and the value in *value.
8433 */
8434
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008435const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008436xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008437 const xmlChar *name;
8438 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008439
8440 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008441 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008442 name = xmlParseName(ctxt);
8443 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008444 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008445 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008446 return(NULL);
8447 }
8448
8449 /*
8450 * read the value
8451 */
8452 SKIP_BLANKS;
8453 if (RAW == '=') {
8454 NEXT;
8455 SKIP_BLANKS;
8456 val = xmlParseAttValue(ctxt);
8457 ctxt->instate = XML_PARSER_CONTENT;
8458 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008459 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008460 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008461 return(NULL);
8462 }
8463
8464 /*
8465 * Check that xml:lang conforms to the specification
8466 * No more registered as an error, just generate a warning now
8467 * since this was deprecated in XML second edition
8468 */
8469 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8470 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008471 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8472 "Malformed value for xml:lang : %s\n",
8473 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008474 }
8475 }
8476
8477 /*
8478 * Check that xml:space conforms to the specification
8479 */
8480 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8481 if (xmlStrEqual(val, BAD_CAST "default"))
8482 *(ctxt->space) = 0;
8483 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8484 *(ctxt->space) = 1;
8485 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008486 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008487"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008488 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008489 }
8490 }
8491
8492 *value = val;
8493 return(name);
8494}
8495
8496/**
8497 * xmlParseStartTag:
8498 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008499 *
Owen Taylor3473f882001-02-23 17:55:21 +00008500 * parse a start of tag either for rule element or
8501 * EmptyElement. In both case we don't parse the tag closing chars.
8502 *
8503 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8504 *
8505 * [ WFC: Unique Att Spec ]
8506 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008507 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008508 *
8509 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8510 *
8511 * [ WFC: Unique Att Spec ]
8512 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008513 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008514 *
8515 * With namespace:
8516 *
8517 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8518 *
8519 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8520 *
8521 * Returns the element name parsed
8522 */
8523
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008524const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008525xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008526 const xmlChar *name;
8527 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008528 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008529 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008530 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008531 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008532 int i;
8533
8534 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008535 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008536
8537 name = xmlParseName(ctxt);
8538 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008539 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008540 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008541 return(NULL);
8542 }
8543
8544 /*
8545 * Now parse the attributes, it ends up with the ending
8546 *
8547 * (S Attribute)* S?
8548 */
8549 SKIP_BLANKS;
8550 GROW;
8551
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008552 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008553 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008554 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008555 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008556 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008557
8558 attname = xmlParseAttribute(ctxt, &attvalue);
8559 if ((attname != NULL) && (attvalue != NULL)) {
8560 /*
8561 * [ WFC: Unique Att Spec ]
8562 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008563 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008564 */
8565 for (i = 0; i < nbatts;i += 2) {
8566 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008567 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008568 xmlFree(attvalue);
8569 goto failed;
8570 }
8571 }
Owen Taylor3473f882001-02-23 17:55:21 +00008572 /*
8573 * Add the pair to atts
8574 */
8575 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008576 maxatts = 22; /* allow for 10 attrs by default */
8577 atts = (const xmlChar **)
8578 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008579 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008580 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008581 if (attvalue != NULL)
8582 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008583 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008584 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008585 ctxt->atts = atts;
8586 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008587 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008588 const xmlChar **n;
8589
Owen Taylor3473f882001-02-23 17:55:21 +00008590 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008591 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008592 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008593 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008594 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008595 if (attvalue != NULL)
8596 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008597 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008598 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008599 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008600 ctxt->atts = atts;
8601 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008602 }
8603 atts[nbatts++] = attname;
8604 atts[nbatts++] = attvalue;
8605 atts[nbatts] = NULL;
8606 atts[nbatts + 1] = NULL;
8607 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008608 if (attvalue != NULL)
8609 xmlFree(attvalue);
8610 }
8611
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008612failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008613
Daniel Veillard3772de32002-12-17 10:31:45 +00008614 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008615 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8616 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008617 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8619 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008620 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008621 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8622 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008623 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8624 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008625 break;
8626 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008627 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008628 GROW;
8629 }
8630
8631 /*
8632 * SAX: Start of Element !
8633 */
8634 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008635 (!ctxt->disableSAX)) {
8636 if (nbatts > 0)
8637 ctxt->sax->startElement(ctxt->userData, name, atts);
8638 else
8639 ctxt->sax->startElement(ctxt->userData, name, NULL);
8640 }
Owen Taylor3473f882001-02-23 17:55:21 +00008641
8642 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008643 /* Free only the content strings */
8644 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008645 if (atts[i] != NULL)
8646 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008647 }
8648 return(name);
8649}
8650
8651/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008653 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008654 * @line: line of the start tag
8655 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008656 *
8657 * parse an end of tag
8658 *
8659 * [42] ETag ::= '</' Name S? '>'
8660 *
8661 * With namespace
8662 *
8663 * [NS 9] ETag ::= '</' QName S? '>'
8664 */
8665
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008666static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008667xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008668 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008669
8670 GROW;
8671 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008672 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008673 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008674 return;
8675 }
8676 SKIP(2);
8677
Daniel Veillard46de64e2002-05-29 08:21:33 +00008678 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008679
8680 /*
8681 * We should definitely be at the ending "S? '>'" part
8682 */
8683 GROW;
8684 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008685 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008686 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008687 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008688 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008689
8690 /*
8691 * [ WFC: Element Type Match ]
8692 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008693 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008694 *
8695 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008696 if (name != (xmlChar*)1) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008697 if (name == NULL) name = BAD_CAST "unparsable";
Daniel Veillardf403d292003-10-05 13:51:35 +00008698 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008699 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008700 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008701 }
8702
8703 /*
8704 * SAX: End of Tag
8705 */
8706 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8707 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008708 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008709
Daniel Veillarde57ec792003-09-10 10:50:59 +00008710 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008711 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008712 return;
8713}
8714
8715/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008716 * xmlParseEndTag:
8717 * @ctxt: an XML parser context
8718 *
8719 * parse an end of tag
8720 *
8721 * [42] ETag ::= '</' Name S? '>'
8722 *
8723 * With namespace
8724 *
8725 * [NS 9] ETag ::= '</' QName S? '>'
8726 */
8727
8728void
8729xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730 xmlParseEndTag1(ctxt, 0);
8731}
Daniel Veillard81273902003-09-30 00:43:48 +00008732#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008733
8734/************************************************************************
8735 * *
8736 * SAX 2 specific operations *
8737 * *
8738 ************************************************************************/
8739
Daniel Veillard0fb18932003-09-07 09:14:37 +00008740/*
8741 * xmlGetNamespace:
8742 * @ctxt: an XML parser context
8743 * @prefix: the prefix to lookup
8744 *
8745 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008746 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008747 *
8748 * Returns the namespace name or NULL if not bound
8749 */
8750static const xmlChar *
8751xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8752 int i;
8753
Daniel Veillarde57ec792003-09-10 10:50:59 +00008754 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008755 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008756 if (ctxt->nsTab[i] == prefix) {
8757 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8758 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008760 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008761 return(NULL);
8762}
8763
8764/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008765 * xmlParseQName:
8766 * @ctxt: an XML parser context
8767 * @prefix: pointer to store the prefix part
8768 *
8769 * parse an XML Namespace QName
8770 *
8771 * [6] QName ::= (Prefix ':')? LocalPart
8772 * [7] Prefix ::= NCName
8773 * [8] LocalPart ::= NCName
8774 *
8775 * Returns the Name parsed or NULL
8776 */
8777
8778static const xmlChar *
8779xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8780 const xmlChar *l, *p;
8781
8782 GROW;
8783
8784 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008785 if (l == NULL) {
8786 if (CUR == ':') {
8787 l = xmlParseName(ctxt);
8788 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008789 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008790 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008791 *prefix = NULL;
8792 return(l);
8793 }
8794 }
8795 return(NULL);
8796 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008797 if (CUR == ':') {
8798 NEXT;
8799 p = l;
8800 l = xmlParseNCName(ctxt);
8801 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008802 xmlChar *tmp;
8803
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008804 if (ctxt->instate == XML_PARSER_EOF)
8805 return(NULL);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008806 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8807 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008808 l = xmlParseNmtoken(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008809 if (l == NULL) {
8810 if (ctxt->instate == XML_PARSER_EOF)
8811 return(NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008812 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008813 } else {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008814 tmp = xmlBuildQName(l, p, NULL, 0);
8815 xmlFree((char *)l);
8816 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008817 p = xmlDictLookup(ctxt->dict, tmp, -1);
8818 if (tmp != NULL) xmlFree(tmp);
8819 *prefix = NULL;
8820 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008821 }
8822 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008823 xmlChar *tmp;
8824
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008825 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8826 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008827 NEXT;
8828 tmp = (xmlChar *) xmlParseName(ctxt);
8829 if (tmp != NULL) {
8830 tmp = xmlBuildQName(tmp, l, NULL, 0);
8831 l = xmlDictLookup(ctxt->dict, tmp, -1);
8832 if (tmp != NULL) xmlFree(tmp);
8833 *prefix = p;
8834 return(l);
8835 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008836 if (ctxt->instate == XML_PARSER_EOF)
8837 return(NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008838 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8839 l = xmlDictLookup(ctxt->dict, tmp, -1);
8840 if (tmp != NULL) xmlFree(tmp);
8841 *prefix = p;
8842 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008843 }
8844 *prefix = p;
8845 } else
8846 *prefix = NULL;
8847 return(l);
8848}
8849
8850/**
8851 * xmlParseQNameAndCompare:
8852 * @ctxt: an XML parser context
8853 * @name: the localname
8854 * @prefix: the prefix, if any.
8855 *
8856 * parse an XML name and compares for match
8857 * (specialized for endtag parsing)
8858 *
8859 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8860 * and the name for mismatch
8861 */
8862
8863static const xmlChar *
8864xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8865 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008866 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008867 const xmlChar *in;
8868 const xmlChar *ret;
8869 const xmlChar *prefix2;
8870
8871 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8872
8873 GROW;
8874 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008875
Daniel Veillard0fb18932003-09-07 09:14:37 +00008876 cmp = prefix;
8877 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008878 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008879 ++cmp;
8880 }
8881 if ((*cmp == 0) && (*in == ':')) {
8882 in++;
8883 cmp = name;
8884 while (*in != 0 && *in == *cmp) {
8885 ++in;
8886 ++cmp;
8887 }
William M. Brack76e95df2003-10-18 16:20:14 +00008888 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889 /* success */
Haibo Huangf0a546b2020-09-01 20:28:19 -07008890 ctxt->input->col += in - ctxt->input->cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008891 ctxt->input->cur = in;
8892 return((const xmlChar*) 1);
8893 }
8894 }
8895 /*
8896 * all strings coms from the dictionary, equality can be done directly
8897 */
8898 ret = xmlParseQName (ctxt, &prefix2);
8899 if ((ret == name) && (prefix == prefix2))
8900 return((const xmlChar*) 1);
8901 return ret;
8902}
8903
8904/**
8905 * xmlParseAttValueInternal:
8906 * @ctxt: an XML parser context
8907 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008908 * @alloc: whether the attribute was reallocated as a new string
8909 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008910 *
8911 * parse a value for an attribute.
8912 * NOTE: if no normalization is needed, the routine will return pointers
8913 * directly from the data buffer.
8914 *
8915 * 3.3.3 Attribute-Value Normalization:
8916 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008917 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918 * - a character reference is processed by appending the referenced
8919 * character to the attribute value
8920 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008921 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008922 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8923 * appending #x20 to the normalized value, except that only a single
8924 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008925 * parsed entity or the literal entity value of an internal parsed entity
8926 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008927 * If the declared value is not CDATA, then the XML processor must further
8928 * process the normalized attribute value by discarding any leading and
8929 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008930 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008931 * All attributes for which no declaration has been read should be treated
8932 * by a non-validating parser as if declared CDATA.
8933 *
8934 * Returns the AttValue parsed or NULL. The value has to be freed by the
8935 * caller if it was copied, this can be detected by val[*len] == 0.
8936 */
8937
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008938#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8939 const xmlChar *oldbase = ctxt->input->base;\
8940 GROW;\
8941 if (ctxt->instate == XML_PARSER_EOF)\
8942 return(NULL);\
8943 if (oldbase != ctxt->input->base) {\
8944 ptrdiff_t delta = ctxt->input->base - oldbase;\
8945 start = start + delta;\
8946 in = in + delta;\
8947 }\
8948 end = ctxt->input->end;
8949
Daniel Veillard0fb18932003-09-07 09:14:37 +00008950static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008951xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8952 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008953{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008954 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008955 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008956 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008957 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008958
8959 GROW;
8960 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008961 line = ctxt->input->line;
8962 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008963 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008964 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008965 return (NULL);
8966 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008967 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008968
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008969 /*
8970 * try to handle in this routine the most common case where no
8971 * allocation of a new string is required and where content is
8972 * pure ASCII.
8973 */
8974 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008975 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008976 end = ctxt->input->end;
8977 start = in;
8978 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008979 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillard0fb18932003-09-07 09:14:37 +00008980 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008981 if (normalize) {
8982 /*
8983 * Skip any leading spaces
8984 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008985 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008986 ((*in == 0x20) || (*in == 0x9) ||
8987 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008988 if (*in == 0xA) {
8989 line++; col = 1;
8990 } else {
8991 col++;
8992 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008993 in++;
8994 start = in;
8995 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008996 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08008997 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8998 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8999 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009000 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009001 return(NULL);
9002 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009003 }
9004 }
9005 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9006 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009007 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009008 if ((*in++ == 0x20) && (*in == 0x20)) break;
9009 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009010 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08009011 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9012 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9013 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009014 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009015 return(NULL);
9016 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009017 }
9018 }
9019 last = in;
9020 /*
9021 * skip the trailing blanks
9022 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009023 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009024 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009025 ((*in == 0x20) || (*in == 0x9) ||
9026 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009027 if (*in == 0xA) {
9028 line++, col = 1;
9029 } else {
9030 col++;
9031 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009032 in++;
9033 if (in >= end) {
9034 const xmlChar *oldbase = ctxt->input->base;
9035 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009036 if (ctxt->instate == XML_PARSER_EOF)
9037 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009038 if (oldbase != ctxt->input->base) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009039 ptrdiff_t delta = ctxt->input->base - oldbase;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009040 start = start + delta;
9041 in = in + delta;
9042 last = last + delta;
9043 }
9044 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009045 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9046 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9047 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009048 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009049 return(NULL);
9050 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009051 }
9052 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009053 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9054 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9055 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009056 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009057 return(NULL);
9058 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009059 if (*in != limit) goto need_complex;
9060 } else {
9061 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9062 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9063 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009064 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009065 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009066 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08009067 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9068 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9069 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009070 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009071 return(NULL);
9072 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009073 }
9074 }
9075 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009076 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9077 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9078 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009079 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009080 return(NULL);
9081 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009082 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009083 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009084 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009085 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009086 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009087 *len = last - start;
9088 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009089 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009090 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009091 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009092 }
9093 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009094 ctxt->input->line = line;
9095 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009096 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009097 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009098need_complex:
9099 if (alloc) *alloc = 1;
9100 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009101}
9102
9103/**
9104 * xmlParseAttribute2:
9105 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009106 * @pref: the element prefix
9107 * @elem: the element name
9108 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009109 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009110 * @len: an int * to save the length of the attribute
9111 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009112 *
9113 * parse an attribute in the new SAX2 framework.
9114 *
9115 * Returns the attribute name, and the value in *value, .
9116 */
9117
9118static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009119xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009120 const xmlChar * pref, const xmlChar * elem,
9121 const xmlChar ** prefix, xmlChar ** value,
9122 int *len, int *alloc)
9123{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009125 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009126 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009127
9128 *value = NULL;
9129 GROW;
9130 name = xmlParseQName(ctxt, prefix);
9131 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009132 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9133 "error parsing attribute name\n");
9134 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009135 }
9136
9137 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009138 * get the type if needed
9139 */
9140 if (ctxt->attsSpecial != NULL) {
9141 int type;
9142
Nick Wellnhoferd422b952017-10-09 13:37:42 +02009143 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9144 pref, elem, *prefix, name);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009145 if (type != 0)
9146 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009147 }
9148
9149 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009150 * read the value
9151 */
9152 SKIP_BLANKS;
9153 if (RAW == '=') {
9154 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009155 SKIP_BLANKS;
9156 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9157 if (normalize) {
9158 /*
9159 * Sometimes a second normalisation pass for spaces is needed
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009160 * but that only happens if charrefs or entities references
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009161 * have been used in the attribute value, i.e. the attribute
9162 * value have been extracted in an allocated string already.
9163 */
9164 if (*alloc) {
9165 const xmlChar *val2;
9166
9167 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009168 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009169 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009170 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009171 }
9172 }
9173 }
9174 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009175 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009176 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009177 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009178 name);
9179 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009180 }
9181
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009182 if (*prefix == ctxt->str_xml) {
9183 /*
9184 * Check that xml:lang conforms to the specification
9185 * No more registered as an error, just generate a warning now
9186 * since this was deprecated in XML second edition
9187 */
9188 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9189 internal_val = xmlStrndup(val, *len);
9190 if (!xmlCheckLanguageID(internal_val)) {
9191 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9192 "Malformed value for xml:lang : %s\n",
9193 internal_val, NULL);
9194 }
9195 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009196
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009197 /*
9198 * Check that xml:space conforms to the specification
9199 */
9200 if (xmlStrEqual(name, BAD_CAST "space")) {
9201 internal_val = xmlStrndup(val, *len);
9202 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9203 *(ctxt->space) = 0;
9204 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9205 *(ctxt->space) = 1;
9206 else {
9207 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9208 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9209 internal_val, NULL);
9210 }
9211 }
9212 if (internal_val) {
9213 xmlFree(internal_val);
9214 }
9215 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009216
9217 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009218 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009219}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220/**
9221 * xmlParseStartTag2:
9222 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009223 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009224 * parse a start of tag either for rule element or
9225 * EmptyElement. In both case we don't parse the tag closing chars.
9226 * This routine is called when running SAX2 parsing
9227 *
9228 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9229 *
9230 * [ WFC: Unique Att Spec ]
9231 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009232 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009233 *
9234 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9235 *
9236 * [ WFC: Unique Att Spec ]
9237 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009238 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009239 *
9240 * With namespace:
9241 *
9242 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9243 *
9244 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9245 *
9246 * Returns the element name parsed
9247 */
9248
9249static const xmlChar *
9250xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009251 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009252 const xmlChar *localname;
9253 const xmlChar *prefix;
9254 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009255 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009256 const xmlChar *nsname;
9257 xmlChar *attvalue;
9258 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009259 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009260 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009261 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009262 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009263 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009264
9265 if (RAW != '<') return(NULL);
9266 NEXT1;
9267
9268 /*
9269 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9270 * point since the attribute values may be stored as pointers to
9271 * the buffer and calling SHRINK would destroy them !
9272 * The Shrinking is only possible once the full set of attribute
9273 * callbacks have been done.
9274 */
9275 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009276 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009277 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009278 nbatts = 0;
9279 nratts = 0;
9280 nbdef = 0;
9281 nbNs = 0;
9282 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009283 /* Forget any namespaces added during an earlier parse of this element. */
9284 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009285
9286 localname = xmlParseQName(ctxt, &prefix);
9287 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009288 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9289 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009290 return(NULL);
9291 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009292 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009293
9294 /*
9295 * Now parse the attributes, it ends up with the ending
9296 *
9297 * (S Attribute)* S?
9298 */
9299 SKIP_BLANKS;
9300 GROW;
9301
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009302 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009303 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009304 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009305 const xmlChar *q = CUR_PTR;
9306 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009307 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009308
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009309 attname = xmlParseAttribute2(ctxt, prefix, localname,
9310 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009311 if ((attname == NULL) || (attvalue == NULL))
9312 goto next_attr;
9313 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009314
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009315 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9316 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9317 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009318
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009319 if (URL == NULL) {
9320 xmlErrMemory(ctxt, "dictionary allocation failure");
9321 if ((attvalue != NULL) && (alloc != 0))
9322 xmlFree(attvalue);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009323 localname = NULL;
9324 goto done;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009325 }
9326 if (*URL != 0) {
9327 uri = xmlParseURI((const char *) URL);
9328 if (uri == NULL) {
9329 xmlNsErr(ctxt, XML_WAR_NS_URI,
9330 "xmlns: '%s' is not a valid URI\n",
9331 URL, NULL, NULL);
9332 } else {
9333 if (uri->scheme == NULL) {
9334 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9335 "xmlns: URI %s is not absolute\n",
9336 URL, NULL, NULL);
9337 }
9338 xmlFreeURI(uri);
9339 }
Daniel Veillard37334572008-07-31 08:20:02 +00009340 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009341 if (attname != ctxt->str_xml) {
9342 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9343 "xml namespace URI cannot be the default namespace\n",
9344 NULL, NULL, NULL);
9345 }
9346 goto next_attr;
9347 }
9348 if ((len == 29) &&
9349 (xmlStrEqual(URL,
9350 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9351 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9352 "reuse of the xmlns namespace name is forbidden\n",
9353 NULL, NULL, NULL);
9354 goto next_attr;
9355 }
9356 }
9357 /*
9358 * check that it's not a defined namespace
9359 */
9360 for (j = 1;j <= nbNs;j++)
9361 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9362 break;
9363 if (j <= nbNs)
9364 xmlErrAttributeDup(ctxt, NULL, attname);
9365 else
9366 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009367
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009368 } else if (aprefix == ctxt->str_xmlns) {
9369 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9370 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009371
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009372 if (attname == ctxt->str_xml) {
9373 if (URL != ctxt->str_xml_ns) {
9374 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9375 "xml namespace prefix mapped to wrong URI\n",
9376 NULL, NULL, NULL);
9377 }
9378 /*
9379 * Do not keep a namespace definition node
9380 */
9381 goto next_attr;
9382 }
9383 if (URL == ctxt->str_xml_ns) {
9384 if (attname != ctxt->str_xml) {
9385 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9386 "xml namespace URI mapped to wrong prefix\n",
9387 NULL, NULL, NULL);
9388 }
9389 goto next_attr;
9390 }
9391 if (attname == ctxt->str_xmlns) {
9392 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9393 "redefinition of the xmlns prefix is forbidden\n",
9394 NULL, NULL, NULL);
9395 goto next_attr;
9396 }
9397 if ((len == 29) &&
9398 (xmlStrEqual(URL,
9399 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9400 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9401 "reuse of the xmlns namespace name is forbidden\n",
9402 NULL, NULL, NULL);
9403 goto next_attr;
9404 }
9405 if ((URL == NULL) || (URL[0] == 0)) {
9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407 "xmlns:%s: Empty XML namespace is not allowed\n",
9408 attname, NULL, NULL);
9409 goto next_attr;
9410 } else {
9411 uri = xmlParseURI((const char *) URL);
9412 if (uri == NULL) {
9413 xmlNsErr(ctxt, XML_WAR_NS_URI,
9414 "xmlns:%s: '%s' is not a valid URI\n",
9415 attname, URL, NULL);
9416 } else {
9417 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9418 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9419 "xmlns:%s: URI %s is not absolute\n",
9420 attname, URL, NULL);
9421 }
9422 xmlFreeURI(uri);
9423 }
9424 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009425
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009426 /*
9427 * check that it's not a defined namespace
9428 */
9429 for (j = 1;j <= nbNs;j++)
9430 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9431 break;
9432 if (j <= nbNs)
9433 xmlErrAttributeDup(ctxt, aprefix, attname);
9434 else
9435 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9436
9437 } else {
9438 /*
9439 * Add the pair to atts
9440 */
9441 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9442 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9443 goto next_attr;
9444 }
9445 maxatts = ctxt->maxatts;
9446 atts = ctxt->atts;
9447 }
9448 ctxt->attallocs[nratts++] = alloc;
9449 atts[nbatts++] = attname;
9450 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009451 /*
9452 * The namespace URI field is used temporarily to point at the
9453 * base of the current input buffer for non-alloced attributes.
9454 * When the input buffer is reallocated, all the pointers become
9455 * invalid, but they can be reconstructed later.
9456 */
9457 if (alloc)
9458 atts[nbatts++] = NULL;
9459 else
9460 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009461 atts[nbatts++] = attvalue;
9462 attvalue += len;
9463 atts[nbatts++] = attvalue;
9464 /*
9465 * tag if some deallocation is needed
9466 */
9467 if (alloc != 0) attval = 1;
9468 attvalue = NULL; /* moved into atts */
9469 }
9470
9471next_attr:
9472 if ((attvalue != NULL) && (alloc != 0)) {
9473 xmlFree(attvalue);
9474 attvalue = NULL;
9475 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009476
9477 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009478 if (ctxt->instate == XML_PARSER_EOF)
9479 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009480 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9481 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009482 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009483 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9484 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009485 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009486 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009487 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9488 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009489 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009490 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009491 break;
9492 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009493 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009494 }
9495
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009496 if (ctxt->input->id != inputid) {
9497 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9498 "Unexpected change of input\n");
9499 localname = NULL;
9500 goto done;
9501 }
9502
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009503 /* Reconstruct attribute value pointers. */
9504 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9505 if (atts[i+2] != NULL) {
9506 /*
9507 * Arithmetic on dangling pointers is technically undefined
9508 * behavior, but well...
9509 */
9510 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9511 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9512 atts[i+3] += offset; /* value */
9513 atts[i+4] += offset; /* valuend */
9514 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009515 }
9516
Daniel Veillard0fb18932003-09-07 09:14:37 +00009517 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009518 * The attributes defaulting
9519 */
9520 if (ctxt->attsDefault != NULL) {
9521 xmlDefAttrsPtr defaults;
9522
9523 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9524 if (defaults != NULL) {
9525 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009526 attname = defaults->values[5 * i];
9527 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009528
9529 /*
9530 * special work for namespaces defaulted defs
9531 */
9532 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9533 /*
9534 * check that it's not a defined namespace
9535 */
9536 for (j = 1;j <= nbNs;j++)
9537 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9538 break;
9539 if (j <= nbNs) continue;
9540
9541 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009542 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009543 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009544 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009545 nbNs++;
9546 }
9547 } else if (aprefix == ctxt->str_xmlns) {
9548 /*
9549 * check that it's not a defined namespace
9550 */
9551 for (j = 1;j <= nbNs;j++)
9552 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9553 break;
9554 if (j <= nbNs) continue;
9555
9556 nsname = xmlGetNamespace(ctxt, attname);
9557 if (nsname != defaults->values[2]) {
9558 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009559 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009560 nbNs++;
9561 }
9562 } else {
9563 /*
9564 * check that it's not a defined attribute
9565 */
9566 for (j = 0;j < nbatts;j+=5) {
9567 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9568 break;
9569 }
9570 if (j < nbatts) continue;
9571
9572 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9573 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009574 localname = NULL;
9575 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009576 }
9577 maxatts = ctxt->maxatts;
9578 atts = ctxt->atts;
9579 }
9580 atts[nbatts++] = attname;
9581 atts[nbatts++] = aprefix;
9582 if (aprefix == NULL)
9583 atts[nbatts++] = NULL;
9584 else
9585 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009586 atts[nbatts++] = defaults->values[5 * i + 2];
9587 atts[nbatts++] = defaults->values[5 * i + 3];
9588 if ((ctxt->standalone == 1) &&
9589 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009590 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009591 "standalone: attribute %s on %s defaulted from external subset\n",
9592 attname, localname);
9593 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009594 nbdef++;
9595 }
9596 }
9597 }
9598 }
9599
Daniel Veillarde70c8772003-11-25 07:21:18 +00009600 /*
9601 * The attributes checkings
9602 */
9603 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009604 /*
9605 * The default namespace does not apply to attribute names.
9606 */
9607 if (atts[i + 1] != NULL) {
9608 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9609 if (nsname == NULL) {
9610 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9611 "Namespace prefix %s for %s on %s is not defined\n",
9612 atts[i + 1], atts[i], localname);
9613 }
9614 atts[i + 2] = nsname;
9615 } else
9616 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009617 /*
9618 * [ WFC: Unique Att Spec ]
9619 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009620 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009621 * As extended by the Namespace in XML REC.
9622 */
9623 for (j = 0; j < i;j += 5) {
9624 if (atts[i] == atts[j]) {
9625 if (atts[i+1] == atts[j+1]) {
9626 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9627 break;
9628 }
9629 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9630 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9631 "Namespaced Attribute %s in '%s' redefined\n",
9632 atts[i], nsname, NULL);
9633 break;
9634 }
9635 }
9636 }
9637 }
9638
Daniel Veillarde57ec792003-09-10 10:50:59 +00009639 nsname = xmlGetNamespace(ctxt, prefix);
9640 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009641 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9642 "Namespace prefix %s on %s is not defined\n",
9643 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009644 }
9645 *pref = prefix;
9646 *URI = nsname;
9647
9648 /*
9649 * SAX: Start of Element !
9650 */
9651 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9652 (!ctxt->disableSAX)) {
9653 if (nbNs > 0)
9654 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9655 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9656 nbatts / 5, nbdef, atts);
9657 else
9658 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9659 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9660 }
9661
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009662done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009663 /*
9664 * Free up attribute allocated strings if needed
9665 */
9666 if (attval != 0) {
9667 for (i = 3,j = 0; j < nratts;i += 5,j++)
9668 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9669 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009670 }
9671
9672 return(localname);
9673}
9674
9675/**
9676 * xmlParseEndTag2:
9677 * @ctxt: an XML parser context
9678 * @line: line of the start tag
9679 * @nsNr: number of namespaces on the start tag
9680 *
9681 * parse an end of tag
9682 *
9683 * [42] ETag ::= '</' Name S? '>'
9684 *
9685 * With namespace
9686 *
9687 * [NS 9] ETag ::= '</' QName S? '>'
9688 */
9689
9690static void
Elliott Hughese54f00d2021-05-13 08:13:46 -07009691xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009692 const xmlChar *name;
9693
9694 GROW;
9695 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009696 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009697 return;
9698 }
9699 SKIP(2);
9700
Elliott Hughese54f00d2021-05-13 08:13:46 -07009701 if (tag->prefix == NULL)
9702 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9703 else
9704 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009705
9706 /*
9707 * We should definitely be at the ending "S? '>'" part
9708 */
9709 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009710 if (ctxt->instate == XML_PARSER_EOF)
9711 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009712 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009713 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009714 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009715 } else
9716 NEXT1;
9717
9718 /*
9719 * [ WFC: Element Type Match ]
9720 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009721 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009722 *
9723 */
9724 if (name != (xmlChar*)1) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009725 if (name == NULL) name = BAD_CAST "unparsable";
Daniel Veillardf403d292003-10-05 13:51:35 +00009726 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009727 "Opening and ending tag mismatch: %s line %d and %s\n",
Elliott Hughese54f00d2021-05-13 08:13:46 -07009728 ctxt->name, tag->line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009729 }
9730
9731 /*
9732 * SAX: End of Tag
9733 */
9734 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9735 (!ctxt->disableSAX))
Elliott Hughese54f00d2021-05-13 08:13:46 -07009736 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9737 tag->URI);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009738
Daniel Veillard0fb18932003-09-07 09:14:37 +00009739 spacePop(ctxt);
Elliott Hughese54f00d2021-05-13 08:13:46 -07009740 if (tag->nsNr != 0)
9741 nsPop(ctxt, tag->nsNr);
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009742}
9743
9744/**
Owen Taylor3473f882001-02-23 17:55:21 +00009745 * xmlParseCDSect:
9746 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009747 *
Owen Taylor3473f882001-02-23 17:55:21 +00009748 * Parse escaped pure raw content.
9749 *
9750 * [18] CDSect ::= CDStart CData CDEnd
9751 *
9752 * [19] CDStart ::= '<![CDATA['
9753 *
9754 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9755 *
9756 * [21] CDEnd ::= ']]>'
9757 */
9758void
9759xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9760 xmlChar *buf = NULL;
9761 int len = 0;
9762 int size = XML_PARSER_BUFFER_SIZE;
9763 int r, rl;
9764 int s, sl;
9765 int cur, l;
9766 int count = 0;
9767
Daniel Veillard8f597c32003-10-06 08:19:27 +00009768 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009769 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009770 SKIP(9);
9771 } else
9772 return;
9773
9774 ctxt->instate = XML_PARSER_CDATA_SECTION;
9775 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009776 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009777 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009778 ctxt->instate = XML_PARSER_CONTENT;
9779 return;
9780 }
9781 NEXTL(rl);
9782 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009783 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009784 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009785 ctxt->instate = XML_PARSER_CONTENT;
9786 return;
9787 }
9788 NEXTL(sl);
9789 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009790 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009791 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009792 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009793 return;
9794 }
William M. Brack871611b2003-10-18 04:53:14 +00009795 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009796 ((r != ']') || (s != ']') || (cur != '>'))) {
9797 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009798 xmlChar *tmp;
9799
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009800 if ((size > XML_MAX_TEXT_LENGTH) &&
9801 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9802 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9803 "CData section too big found", NULL);
9804 xmlFree (buf);
9805 return;
9806 }
9807 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009808 if (tmp == NULL) {
9809 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009811 return;
9812 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009813 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009814 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009815 }
9816 COPY_BUF(rl,buf,len,r);
9817 r = s;
9818 rl = sl;
9819 s = cur;
9820 sl = l;
9821 count++;
9822 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08009823 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00009824 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009825 if (ctxt->instate == XML_PARSER_EOF) {
9826 xmlFree(buf);
9827 return;
9828 }
Owen Taylor3473f882001-02-23 17:55:21 +00009829 count = 0;
9830 }
9831 NEXTL(l);
9832 cur = CUR_CHAR(l);
9833 }
9834 buf[len] = 0;
9835 ctxt->instate = XML_PARSER_CONTENT;
9836 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009837 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009838 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009839 xmlFree(buf);
9840 return;
9841 }
9842 NEXTL(l);
9843
9844 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009845 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009846 */
9847 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9848 if (ctxt->sax->cdataBlock != NULL)
9849 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009850 else if (ctxt->sax->characters != NULL)
9851 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009852 }
9853 xmlFree(buf);
9854}
9855
9856/**
Elliott Hughese54f00d2021-05-13 08:13:46 -07009857 * xmlParseContentInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00009858 * @ctxt: an XML parser context
9859 *
Elliott Hughese54f00d2021-05-13 08:13:46 -07009860 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9861 * unexpected EOF to the caller.
Owen Taylor3473f882001-02-23 17:55:21 +00009862 */
9863
Elliott Hughese54f00d2021-05-13 08:13:46 -07009864static void
9865xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009866 int nameNr = ctxt->nameNr;
9867
Owen Taylor3473f882001-02-23 17:55:21 +00009868 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009869 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009870 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009871 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009872 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009873 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009874
9875 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009876 * First case : a Processing Instruction.
9877 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009878 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009879 xmlParsePI(ctxt);
9880 }
9881
9882 /*
9883 * Second case : a CDSection
9884 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009885 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009886 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009887 xmlParseCDSect(ctxt);
9888 }
9889
9890 /*
9891 * Third case : a comment
9892 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009893 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009894 (NXT(2) == '-') && (NXT(3) == '-')) {
9895 xmlParseComment(ctxt);
9896 ctxt->instate = XML_PARSER_CONTENT;
9897 }
9898
9899 /*
9900 * Fourth case : a sub-element.
9901 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009902 else if (*cur == '<') {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009903 if (NXT(1) == '/') {
9904 if (ctxt->nameNr <= nameNr)
9905 break;
9906 xmlParseElementEnd(ctxt);
9907 } else {
9908 xmlParseElementStart(ctxt);
9909 }
Owen Taylor3473f882001-02-23 17:55:21 +00009910 }
9911
9912 /*
9913 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009914 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009915 */
9916
Daniel Veillard21a0f912001-02-25 19:54:14 +00009917 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009918 xmlParseReference(ctxt);
9919 }
9920
9921 /*
9922 * Last case, text. Note that References are handled directly.
9923 */
9924 else {
9925 xmlParseCharData(ctxt, 0);
9926 }
9927
9928 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009929 SHRINK;
9930
Daniel Veillardfdc91562002-07-01 21:52:03 +00009931 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009932 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9933 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009934 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009935 break;
9936 }
9937 }
9938}
9939
9940/**
Elliott Hughese54f00d2021-05-13 08:13:46 -07009941 * xmlParseContent:
9942 * @ctxt: an XML parser context
9943 *
9944 * Parse a content sequence. Stops at EOF or '</'.
9945 *
9946 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9947 */
9948
9949void
9950xmlParseContent(xmlParserCtxtPtr ctxt) {
9951 int nameNr = ctxt->nameNr;
9952
9953 xmlParseContentInternal(ctxt);
9954
9955 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9956 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9957 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9958 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9959 "Premature end of data in tag %s line %d\n",
9960 name, line, NULL);
9961 }
9962}
9963
9964/**
Owen Taylor3473f882001-02-23 17:55:21 +00009965 * xmlParseElement:
9966 * @ctxt: an XML parser context
9967 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009968 * parse an XML element
Owen Taylor3473f882001-02-23 17:55:21 +00009969 *
9970 * [39] element ::= EmptyElemTag | STag content ETag
9971 *
9972 * [ WFC: Element Type Match ]
9973 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009974 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009975 *
Owen Taylor3473f882001-02-23 17:55:21 +00009976 */
9977
9978void
9979xmlParseElement(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009980 if (xmlParseElementStart(ctxt) != 0)
9981 return;
Elliott Hughese54f00d2021-05-13 08:13:46 -07009982
9983 xmlParseContentInternal(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009984 if (ctxt->instate == XML_PARSER_EOF)
9985 return;
Elliott Hughese54f00d2021-05-13 08:13:46 -07009986
9987 if (CUR == 0) {
9988 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9989 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9990 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9991 "Premature end of data in tag %s line %d\n",
9992 name, line, NULL);
9993 return;
9994 }
9995
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009996 xmlParseElementEnd(ctxt);
9997}
9998
9999/**
10000 * xmlParseElementStart:
10001 * @ctxt: an XML parser context
10002 *
10003 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10004 * opening tag was parsed, 1 if an empty element was parsed.
10005 */
10006static int
10007xmlParseElementStart(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010008 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010009 const xmlChar *prefix = NULL;
10010 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010011 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010012 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010013 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010014 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010015
Daniel Veillard8915c152008-08-26 13:05:34 +000010016 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10017 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10018 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10019 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10020 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010021 xmlHaltParser(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010022 return(-1);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010023 }
10024
Owen Taylor3473f882001-02-23 17:55:21 +000010025 /* Capture start position */
10026 if (ctxt->record_info) {
10027 node_info.begin_pos = ctxt->input->consumed +
10028 (CUR_PTR - ctxt->input->base);
10029 node_info.begin_line = ctxt->input->line;
10030 }
10031
10032 if (ctxt->spaceNr == 0)
10033 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010034 else if (*ctxt->space == -2)
10035 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010036 else
10037 spacePush(ctxt, *ctxt->space);
10038
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010039 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010040#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010041 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010042#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010043 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010044#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010045 else
10046 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010047#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010048 if (ctxt->instate == XML_PARSER_EOF)
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010049 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010050 if (name == NULL) {
10051 spacePop(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010052 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010053 }
Elliott Hughese54f00d2021-05-13 08:13:46 -070010054 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010055 ret = ctxt->node;
10056
Daniel Veillard4432df22003-09-28 18:58:27 +000010057#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010058 /*
10059 * [ VC: Root Element Type ]
10060 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010061 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010062 */
10063 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10064 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10065 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010066#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010067
10068 /*
10069 * Check for an Empty Element.
10070 */
10071 if ((RAW == '/') && (NXT(1) == '>')) {
10072 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010073 if (ctxt->sax2) {
10074 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10075 (!ctxt->disableSAX))
10076 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010077#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010078 } else {
10079 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10080 (!ctxt->disableSAX))
10081 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010082#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010083 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010084 namePop(ctxt);
10085 spacePop(ctxt);
10086 if (nsNr != ctxt->nsNr)
10087 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010088 if ( ret != NULL && ctxt->record_info ) {
10089 node_info.end_pos = ctxt->input->consumed +
10090 (CUR_PTR - ctxt->input->base);
10091 node_info.end_line = ctxt->input->line;
10092 node_info.node = ret;
10093 xmlParserAddNodeInfo(ctxt, &node_info);
10094 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010095 return(1);
Owen Taylor3473f882001-02-23 17:55:21 +000010096 }
10097 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010098 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010099 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010100 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10101 "Couldn't find end of Start Tag %s line %d\n",
10102 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010103
10104 /*
10105 * end of parsing of this node.
10106 */
10107 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010108 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010109 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010110 if (nsNr != ctxt->nsNr)
10111 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010112
10113 /*
10114 * Capture end position and add node
10115 */
10116 if ( ret != NULL && ctxt->record_info ) {
10117 node_info.end_pos = ctxt->input->consumed +
10118 (CUR_PTR - ctxt->input->base);
10119 node_info.end_line = ctxt->input->line;
10120 node_info.node = ret;
10121 xmlParserAddNodeInfo(ctxt, &node_info);
10122 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010123 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010124 }
10125
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010126 return(0);
10127}
Owen Taylor3473f882001-02-23 17:55:21 +000010128
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010129/**
10130 * xmlParseElementEnd:
10131 * @ctxt: an XML parser context
10132 *
10133 * Parse the end of an XML element.
10134 */
10135static void
10136xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10137 xmlParserNodeInfo node_info;
10138 xmlNodePtr ret = ctxt->node;
10139
10140 if (ctxt->nameNr <= 0)
10141 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010142
10143 /*
10144 * parse the end of tag: '</' should be here.
10145 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010146 if (ctxt->sax2) {
Elliott Hughese54f00d2021-05-13 08:13:46 -070010147 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010148 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010149 }
10150#ifdef LIBXML_SAX1_ENABLED
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010151 else
10152 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010153#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010154
10155 /*
10156 * Capture end position and add node
10157 */
10158 if ( ret != NULL && ctxt->record_info ) {
10159 node_info.end_pos = ctxt->input->consumed +
10160 (CUR_PTR - ctxt->input->base);
10161 node_info.end_line = ctxt->input->line;
10162 node_info.node = ret;
10163 xmlParserAddNodeInfo(ctxt, &node_info);
10164 }
10165}
10166
10167/**
10168 * xmlParseVersionNum:
10169 * @ctxt: an XML parser context
10170 *
10171 * parse the XML version value.
10172 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010173 * [26] VersionNum ::= '1.' [0-9]+
10174 *
10175 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010176 *
10177 * Returns the string giving the XML version number, or NULL
10178 */
10179xmlChar *
10180xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10181 xmlChar *buf = NULL;
10182 int len = 0;
10183 int size = 10;
10184 xmlChar cur;
10185
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010186 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010187 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010188 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010189 return(NULL);
10190 }
10191 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010192 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010193 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010194 return(NULL);
10195 }
10196 buf[len++] = cur;
10197 NEXT;
10198 cur=CUR;
10199 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010200 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010201 return(NULL);
10202 }
10203 buf[len++] = cur;
10204 NEXT;
10205 cur=CUR;
10206 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010207 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010208 xmlChar *tmp;
10209
Owen Taylor3473f882001-02-23 17:55:21 +000010210 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010211 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10212 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010213 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010214 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010215 return(NULL);
10216 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010217 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010218 }
10219 buf[len++] = cur;
10220 NEXT;
10221 cur=CUR;
10222 }
10223 buf[len] = 0;
10224 return(buf);
10225}
10226
10227/**
10228 * xmlParseVersionInfo:
10229 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010230 *
Owen Taylor3473f882001-02-23 17:55:21 +000010231 * parse the XML version.
10232 *
10233 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010234 *
Owen Taylor3473f882001-02-23 17:55:21 +000010235 * [25] Eq ::= S? '=' S?
10236 *
10237 * Returns the version string, e.g. "1.0"
10238 */
10239
10240xmlChar *
10241xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10242 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010243
Daniel Veillarda07050d2003-10-19 14:46:32 +000010244 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010245 SKIP(7);
10246 SKIP_BLANKS;
10247 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010248 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010249 return(NULL);
10250 }
10251 NEXT;
10252 SKIP_BLANKS;
10253 if (RAW == '"') {
10254 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010255 version = xmlParseVersionNum(ctxt);
10256 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010257 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010258 } else
10259 NEXT;
10260 } else if (RAW == '\''){
10261 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010262 version = xmlParseVersionNum(ctxt);
10263 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010264 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010265 } else
10266 NEXT;
10267 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010268 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010269 }
10270 }
10271 return(version);
10272}
10273
10274/**
10275 * xmlParseEncName:
10276 * @ctxt: an XML parser context
10277 *
10278 * parse the XML encoding name
10279 *
10280 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10281 *
10282 * Returns the encoding name value or NULL
10283 */
10284xmlChar *
10285xmlParseEncName(xmlParserCtxtPtr ctxt) {
10286 xmlChar *buf = NULL;
10287 int len = 0;
10288 int size = 10;
10289 xmlChar cur;
10290
10291 cur = CUR;
10292 if (((cur >= 'a') && (cur <= 'z')) ||
10293 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010294 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010295 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010296 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010297 return(NULL);
10298 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010299
Owen Taylor3473f882001-02-23 17:55:21 +000010300 buf[len++] = cur;
10301 NEXT;
10302 cur = CUR;
10303 while (((cur >= 'a') && (cur <= 'z')) ||
10304 ((cur >= 'A') && (cur <= 'Z')) ||
10305 ((cur >= '0') && (cur <= '9')) ||
10306 (cur == '.') || (cur == '_') ||
10307 (cur == '-')) {
10308 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010309 xmlChar *tmp;
10310
Owen Taylor3473f882001-02-23 17:55:21 +000010311 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010312 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10313 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010314 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010315 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010316 return(NULL);
10317 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010318 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010319 }
10320 buf[len++] = cur;
10321 NEXT;
10322 cur = CUR;
10323 if (cur == 0) {
10324 SHRINK;
10325 GROW;
10326 cur = CUR;
10327 }
10328 }
10329 buf[len] = 0;
10330 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010331 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010332 }
10333 return(buf);
10334}
10335
10336/**
10337 * xmlParseEncodingDecl:
10338 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010339 *
Owen Taylor3473f882001-02-23 17:55:21 +000010340 * parse the XML encoding declaration
10341 *
10342 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10343 *
10344 * this setups the conversion filters.
10345 *
10346 * Returns the encoding value or NULL
10347 */
10348
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010349const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010350xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10351 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010352
10353 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010354 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010355 SKIP(8);
10356 SKIP_BLANKS;
10357 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010358 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010359 return(NULL);
10360 }
10361 NEXT;
10362 SKIP_BLANKS;
10363 if (RAW == '"') {
10364 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010365 encoding = xmlParseEncName(ctxt);
10366 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010367 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010368 xmlFree((xmlChar *) encoding);
10369 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010370 } else
10371 NEXT;
10372 } else if (RAW == '\''){
10373 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010374 encoding = xmlParseEncName(ctxt);
10375 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010376 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010377 xmlFree((xmlChar *) encoding);
10378 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010379 } else
10380 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010381 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010382 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010383 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010384
10385 /*
10386 * Non standard parsing, allowing the user to ignore encoding
10387 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010388 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10389 xmlFree((xmlChar *) encoding);
10390 return(NULL);
10391 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010392
Daniel Veillard6b621b82003-08-11 15:03:34 +000010393 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010394 * UTF-16 encoding switch has already taken place at this stage,
Daniel Veillard6b621b82003-08-11 15:03:34 +000010395 * more over the little-endian/big-endian selection is already done
10396 */
10397 if ((encoding != NULL) &&
10398 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10399 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010400 /*
10401 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010402 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010403 * document is apparently UTF-8 compatible, then raise an
10404 * encoding mismatch fatal error
10405 */
10406 if ((ctxt->encoding == NULL) &&
10407 (ctxt->input->buf != NULL) &&
10408 (ctxt->input->buf->encoder == NULL)) {
10409 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10410 "Document labelled UTF-16 but has UTF-8 content\n");
10411 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010412 if (ctxt->encoding != NULL)
10413 xmlFree((xmlChar *) ctxt->encoding);
10414 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010415 }
10416 /*
10417 * UTF-8 encoding is handled natively
10418 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010419 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010420 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10421 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010422 if (ctxt->encoding != NULL)
10423 xmlFree((xmlChar *) ctxt->encoding);
10424 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010425 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010426 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010427 xmlCharEncodingHandlerPtr handler;
10428
10429 if (ctxt->input->encoding != NULL)
10430 xmlFree((xmlChar *) ctxt->input->encoding);
10431 ctxt->input->encoding = encoding;
10432
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010433 handler = xmlFindCharEncodingHandler((const char *) encoding);
10434 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010435 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10436 /* failed to convert */
10437 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10438 return(NULL);
10439 }
Owen Taylor3473f882001-02-23 17:55:21 +000010440 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010441 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010442 "Unsupported encoding %s\n", encoding);
10443 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010444 }
10445 }
10446 }
10447 return(encoding);
10448}
10449
10450/**
10451 * xmlParseSDDecl:
10452 * @ctxt: an XML parser context
10453 *
10454 * parse the XML standalone declaration
10455 *
10456 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010457 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010458 *
10459 * [ VC: Standalone Document Declaration ]
10460 * TODO The standalone document declaration must have the value "no"
10461 * if any external markup declarations contain declarations of:
10462 * - attributes with default values, if elements to which these
10463 * attributes apply appear in the document without specifications
10464 * of values for these attributes, or
10465 * - entities (other than amp, lt, gt, apos, quot), if references
10466 * to those entities appear in the document, or
10467 * - attributes with values subject to normalization, where the
10468 * attribute appears in the document with a value which will change
10469 * as a result of normalization, or
10470 * - element types with element content, if white space occurs directly
10471 * within any instance of those types.
10472 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010473 * Returns:
10474 * 1 if standalone="yes"
10475 * 0 if standalone="no"
10476 * -2 if standalone attribute is missing or invalid
10477 * (A standalone value of -2 means that the XML declaration was found,
10478 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010479 */
10480
10481int
10482xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010483 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010484
10485 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010486 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010487 SKIP(10);
10488 SKIP_BLANKS;
10489 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010490 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010491 return(standalone);
10492 }
10493 NEXT;
10494 SKIP_BLANKS;
10495 if (RAW == '\''){
10496 NEXT;
10497 if ((RAW == 'n') && (NXT(1) == 'o')) {
10498 standalone = 0;
10499 SKIP(2);
10500 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10501 (NXT(2) == 's')) {
10502 standalone = 1;
10503 SKIP(3);
10504 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010505 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010506 }
10507 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010508 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010509 } else
10510 NEXT;
10511 } else if (RAW == '"'){
10512 NEXT;
10513 if ((RAW == 'n') && (NXT(1) == 'o')) {
10514 standalone = 0;
10515 SKIP(2);
10516 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10517 (NXT(2) == 's')) {
10518 standalone = 1;
10519 SKIP(3);
10520 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010521 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010522 }
10523 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010524 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010525 } else
10526 NEXT;
10527 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010528 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010529 }
10530 }
10531 return(standalone);
10532}
10533
10534/**
10535 * xmlParseXMLDecl:
10536 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010537 *
Owen Taylor3473f882001-02-23 17:55:21 +000010538 * parse an XML declaration header
10539 *
10540 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10541 */
10542
10543void
10544xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10545 xmlChar *version;
10546
10547 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010548 * This value for standalone indicates that the document has an
10549 * XML declaration but it does not have a standalone attribute.
10550 * It will be overwritten later if a standalone attribute is found.
10551 */
10552 ctxt->input->standalone = -2;
10553
10554 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010555 * We know that '<?xml' is here.
10556 */
10557 SKIP(5);
10558
William M. Brack76e95df2003-10-18 16:20:14 +000010559 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010560 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10561 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010562 }
10563 SKIP_BLANKS;
10564
10565 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010566 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010567 */
10568 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010569 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010570 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010571 } else {
10572 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10573 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010574 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010575 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010576 if (ctxt->options & XML_PARSE_OLD10) {
10577 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10578 "Unsupported version '%s'\n",
10579 version);
10580 } else {
10581 if ((version[0] == '1') && ((version[1] == '.'))) {
10582 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10583 "Unsupported version '%s'\n",
10584 version, NULL);
10585 } else {
10586 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10587 "Unsupported version '%s'\n",
10588 version);
10589 }
10590 }
Daniel Veillard19840942001-11-29 16:11:38 +000010591 }
10592 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010593 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010594 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010595 }
Owen Taylor3473f882001-02-23 17:55:21 +000010596
10597 /*
10598 * We may have the encoding declaration
10599 */
William M. Brack76e95df2003-10-18 16:20:14 +000010600 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010601 if ((RAW == '?') && (NXT(1) == '>')) {
10602 SKIP(2);
10603 return;
10604 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010606 }
10607 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010608 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10609 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010610 /*
10611 * The XML REC instructs us to stop parsing right here
10612 */
10613 return;
10614 }
10615
10616 /*
10617 * We may have the standalone status.
10618 */
William M. Brack76e95df2003-10-18 16:20:14 +000010619 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010620 if ((RAW == '?') && (NXT(1) == '>')) {
10621 SKIP(2);
10622 return;
10623 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010624 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010625 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010626
10627 /*
10628 * We can grow the input buffer freely at that point
10629 */
10630 GROW;
10631
Owen Taylor3473f882001-02-23 17:55:21 +000010632 SKIP_BLANKS;
10633 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10634
10635 SKIP_BLANKS;
10636 if ((RAW == '?') && (NXT(1) == '>')) {
10637 SKIP(2);
10638 } else if (RAW == '>') {
10639 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010640 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010641 NEXT;
10642 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010643 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010644 MOVETO_ENDTAG(CUR_PTR);
10645 NEXT;
10646 }
10647}
10648
10649/**
10650 * xmlParseMisc:
10651 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010652 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010653 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010654 *
10655 * [27] Misc ::= Comment | PI | S
10656 */
10657
10658void
10659xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010660 while ((ctxt->instate != XML_PARSER_EOF) &&
10661 (((RAW == '<') && (NXT(1) == '?')) ||
10662 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10663 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010664 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010665 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010666 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010667 NEXT;
10668 } else
10669 xmlParseComment(ctxt);
10670 }
10671}
10672
10673/**
10674 * xmlParseDocument:
10675 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010676 *
Owen Taylor3473f882001-02-23 17:55:21 +000010677 * parse an XML document (and build a tree if using the standard SAX
10678 * interface).
10679 *
10680 * [1] document ::= prolog element Misc*
10681 *
10682 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10683 *
10684 * Returns 0, -1 in case of error. the parser context is augmented
10685 * as a result of the parsing.
10686 */
10687
10688int
10689xmlParseDocument(xmlParserCtxtPtr ctxt) {
10690 xmlChar start[4];
10691 xmlCharEncoding enc;
10692
10693 xmlInitParser();
10694
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010695 if ((ctxt == NULL) || (ctxt->input == NULL))
10696 return(-1);
10697
Owen Taylor3473f882001-02-23 17:55:21 +000010698 GROW;
10699
10700 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010701 * SAX: detecting the level.
10702 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010703 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010704
10705 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010706 * SAX: beginning of the document processing.
10707 */
10708 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10709 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010710 if (ctxt->instate == XML_PARSER_EOF)
10711 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010712
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010713 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010714 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010715 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010716 * Get the 4 first bytes and decode the charset
10717 * if enc != XML_CHAR_ENCODING_NONE
10718 * plug some encoding conversion routines.
10719 */
10720 start[0] = RAW;
10721 start[1] = NXT(1);
10722 start[2] = NXT(2);
10723 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010724 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010725 if (enc != XML_CHAR_ENCODING_NONE) {
10726 xmlSwitchEncoding(ctxt, enc);
10727 }
Owen Taylor3473f882001-02-23 17:55:21 +000010728 }
10729
10730
10731 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010732 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010733 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010734 }
10735
10736 /*
10737 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010738 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010739 * than just the first line, unless the amount of data is really
10740 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010741 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010742 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10743 GROW;
10744 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010745 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010746
10747 /*
10748 * Note that we will switch encoding on the fly.
10749 */
10750 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010751 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10752 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010753 /*
10754 * The XML REC instructs us to stop parsing right here
10755 */
10756 return(-1);
10757 }
10758 ctxt->standalone = ctxt->input->standalone;
10759 SKIP_BLANKS;
10760 } else {
10761 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10762 }
10763 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10764 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010765 if (ctxt->instate == XML_PARSER_EOF)
10766 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010767 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10768 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10769 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10770 }
Owen Taylor3473f882001-02-23 17:55:21 +000010771
10772 /*
10773 * The Misc part of the Prolog
10774 */
10775 GROW;
10776 xmlParseMisc(ctxt);
10777
10778 /*
10779 * Then possibly doc type declaration(s) and more Misc
10780 * (doctypedecl Misc*)?
10781 */
10782 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010783 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010784
10785 ctxt->inSubset = 1;
10786 xmlParseDocTypeDecl(ctxt);
10787 if (RAW == '[') {
10788 ctxt->instate = XML_PARSER_DTD;
10789 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010790 if (ctxt->instate == XML_PARSER_EOF)
10791 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010792 }
10793
10794 /*
10795 * Create and update the external subset.
10796 */
10797 ctxt->inSubset = 2;
10798 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10799 (!ctxt->disableSAX))
10800 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10801 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010802 if (ctxt->instate == XML_PARSER_EOF)
10803 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010804 ctxt->inSubset = 0;
10805
Daniel Veillardac4118d2008-01-11 05:27:32 +000010806 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010807
10808 ctxt->instate = XML_PARSER_PROLOG;
10809 xmlParseMisc(ctxt);
10810 }
10811
10812 /*
10813 * Time to start parsing the tree itself
10814 */
10815 GROW;
10816 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010817 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10818 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010819 } else {
10820 ctxt->instate = XML_PARSER_CONTENT;
10821 xmlParseElement(ctxt);
10822 ctxt->instate = XML_PARSER_EPILOG;
10823
10824
10825 /*
10826 * The Misc part at the end
10827 */
10828 xmlParseMisc(ctxt);
10829
Daniel Veillard561b7f82002-03-20 21:55:57 +000010830 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010831 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010832 }
10833 ctxt->instate = XML_PARSER_EOF;
10834 }
10835
10836 /*
10837 * SAX: end of the document processing.
10838 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010839 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010840 ctxt->sax->endDocument(ctxt->userData);
10841
Daniel Veillard5997aca2002-03-18 18:36:20 +000010842 /*
10843 * Remove locally kept entity definitions if the tree was not built
10844 */
10845 if ((ctxt->myDoc != NULL) &&
10846 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10847 xmlFreeDoc(ctxt->myDoc);
10848 ctxt->myDoc = NULL;
10849 }
10850
Daniel Veillardae0765b2008-07-31 19:54:59 +000010851 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10852 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10853 if (ctxt->valid)
10854 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10855 if (ctxt->nsWellFormed)
10856 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10857 if (ctxt->options & XML_PARSE_OLD10)
10858 ctxt->myDoc->properties |= XML_DOC_OLD10;
10859 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010860 if (! ctxt->wellFormed) {
10861 ctxt->valid = 0;
10862 return(-1);
10863 }
Owen Taylor3473f882001-02-23 17:55:21 +000010864 return(0);
10865}
10866
10867/**
10868 * xmlParseExtParsedEnt:
10869 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010870 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010871 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010872 * An external general parsed entity is well-formed if it matches the
10873 * production labeled extParsedEnt.
10874 *
10875 * [78] extParsedEnt ::= TextDecl? content
10876 *
10877 * Returns 0, -1 in case of error. the parser context is augmented
10878 * as a result of the parsing.
10879 */
10880
10881int
10882xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10883 xmlChar start[4];
10884 xmlCharEncoding enc;
10885
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010886 if ((ctxt == NULL) || (ctxt->input == NULL))
10887 return(-1);
10888
Owen Taylor3473f882001-02-23 17:55:21 +000010889 xmlDefaultSAXHandlerInit();
10890
Daniel Veillard309f81d2003-09-23 09:02:53 +000010891 xmlDetectSAX2(ctxt);
10892
Owen Taylor3473f882001-02-23 17:55:21 +000010893 GROW;
10894
10895 /*
10896 * SAX: beginning of the document processing.
10897 */
10898 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10899 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10900
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010901 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010902 * Get the 4 first bytes and decode the charset
10903 * if enc != XML_CHAR_ENCODING_NONE
10904 * plug some encoding conversion routines.
10905 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010906 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10907 start[0] = RAW;
10908 start[1] = NXT(1);
10909 start[2] = NXT(2);
10910 start[3] = NXT(3);
10911 enc = xmlDetectCharEncoding(start, 4);
10912 if (enc != XML_CHAR_ENCODING_NONE) {
10913 xmlSwitchEncoding(ctxt, enc);
10914 }
Owen Taylor3473f882001-02-23 17:55:21 +000010915 }
10916
10917
10918 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010919 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010920 }
10921
10922 /*
10923 * Check for the XMLDecl in the Prolog.
10924 */
10925 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010926 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010927
10928 /*
10929 * Note that we will switch encoding on the fly.
10930 */
10931 xmlParseXMLDecl(ctxt);
10932 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10933 /*
10934 * The XML REC instructs us to stop parsing right here
10935 */
10936 return(-1);
10937 }
10938 SKIP_BLANKS;
10939 } else {
10940 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10941 }
10942 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10943 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010944 if (ctxt->instate == XML_PARSER_EOF)
10945 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010946
10947 /*
10948 * Doing validity checking on chunk doesn't make sense
10949 */
10950 ctxt->instate = XML_PARSER_CONTENT;
10951 ctxt->validate = 0;
10952 ctxt->loadsubset = 0;
10953 ctxt->depth = 0;
10954
10955 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010956 if (ctxt->instate == XML_PARSER_EOF)
10957 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010958
Owen Taylor3473f882001-02-23 17:55:21 +000010959 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010960 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010961 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010962 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010963 }
10964
10965 /*
10966 * SAX: end of the document processing.
10967 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010968 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010969 ctxt->sax->endDocument(ctxt->userData);
10970
10971 if (! ctxt->wellFormed) return(-1);
10972 return(0);
10973}
10974
Daniel Veillard73b013f2003-09-30 12:36:01 +000010975#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010976/************************************************************************
10977 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010978 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010979 * *
10980 ************************************************************************/
10981
10982/**
10983 * xmlParseLookupSequence:
10984 * @ctxt: an XML parser context
10985 * @first: the first char to lookup
10986 * @next: the next char to lookup or zero
10987 * @third: the next char to lookup or zero
10988 *
10989 * Try to find if a sequence (first, next, third) or just (first next) or
10990 * (first) is available in the input stream.
10991 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10992 * to avoid rescanning sequences of bytes, it DOES change the state of the
10993 * parser, do not use liberally.
10994 *
10995 * Returns the index to the current parsing point if the full sequence
10996 * is available, -1 otherwise.
10997 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010998static int
Owen Taylor3473f882001-02-23 17:55:21 +000010999xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11000 xmlChar next, xmlChar third) {
11001 int base, len;
11002 xmlParserInputPtr in;
11003 const xmlChar *buf;
11004
11005 in = ctxt->input;
11006 if (in == NULL) return(-1);
11007 base = in->cur - in->base;
11008 if (base < 0) return(-1);
11009 if (ctxt->checkIndex > base)
11010 base = ctxt->checkIndex;
11011 if (in->buf == NULL) {
11012 buf = in->base;
11013 len = in->length;
11014 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011015 buf = xmlBufContent(in->buf->buffer);
11016 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011017 }
11018 /* take into account the sequence length */
11019 if (third) len -= 2;
11020 else if (next) len --;
11021 for (;base < len;base++) {
11022 if (buf[base] == first) {
11023 if (third != 0) {
11024 if ((buf[base + 1] != next) ||
11025 (buf[base + 2] != third)) continue;
11026 } else if (next != 0) {
11027 if (buf[base + 1] != next) continue;
11028 }
11029 ctxt->checkIndex = 0;
11030#ifdef DEBUG_PUSH
11031 if (next == 0)
11032 xmlGenericError(xmlGenericErrorContext,
11033 "PP: lookup '%c' found at %d\n",
11034 first, base);
11035 else if (third == 0)
11036 xmlGenericError(xmlGenericErrorContext,
11037 "PP: lookup '%c%c' found at %d\n",
11038 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011039 else
Owen Taylor3473f882001-02-23 17:55:21 +000011040 xmlGenericError(xmlGenericErrorContext,
11041 "PP: lookup '%c%c%c' found at %d\n",
11042 first, next, third, base);
11043#endif
11044 return(base - (in->cur - in->base));
11045 }
11046 }
11047 ctxt->checkIndex = base;
11048#ifdef DEBUG_PUSH
11049 if (next == 0)
11050 xmlGenericError(xmlGenericErrorContext,
11051 "PP: lookup '%c' failed\n", first);
11052 else if (third == 0)
11053 xmlGenericError(xmlGenericErrorContext,
11054 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011055 else
Owen Taylor3473f882001-02-23 17:55:21 +000011056 xmlGenericError(xmlGenericErrorContext,
11057 "PP: lookup '%c%c%c' failed\n", first, next, third);
11058#endif
11059 return(-1);
11060}
11061
11062/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011063 * xmlParseGetLasts:
11064 * @ctxt: an XML parser context
11065 * @lastlt: pointer to store the last '<' from the input
11066 * @lastgt: pointer to store the last '>' from the input
11067 *
11068 * Lookup the last < and > in the current chunk
11069 */
11070static void
11071xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11072 const xmlChar **lastgt) {
11073 const xmlChar *tmp;
11074
11075 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11076 xmlGenericError(xmlGenericErrorContext,
11077 "Internal error: xmlParseGetLasts\n");
11078 return;
11079 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011080 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011081 tmp = ctxt->input->end;
11082 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011083 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011084 if (tmp < ctxt->input->base) {
11085 *lastlt = NULL;
11086 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011087 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011088 *lastlt = tmp;
11089 tmp++;
11090 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11091 if (*tmp == '\'') {
11092 tmp++;
11093 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11094 if (tmp < ctxt->input->end) tmp++;
11095 } else if (*tmp == '"') {
11096 tmp++;
11097 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11098 if (tmp < ctxt->input->end) tmp++;
11099 } else
11100 tmp++;
11101 }
11102 if (tmp < ctxt->input->end)
11103 *lastgt = tmp;
11104 else {
11105 tmp = *lastlt;
11106 tmp--;
11107 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11108 if (tmp >= ctxt->input->base)
11109 *lastgt = tmp;
11110 else
11111 *lastgt = NULL;
11112 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011113 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011114 } else {
11115 *lastlt = NULL;
11116 *lastgt = NULL;
11117 }
11118}
11119/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011120 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011121 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011122 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011123 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011124 *
11125 * Check that the block of characters is okay as SCdata content [20]
11126 *
11127 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011128 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011129 */
11130static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011131xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011132 int ix;
11133 unsigned char c;
11134 int codepoint;
11135
11136 if ((utf == NULL) || (len <= 0))
11137 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011138
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011139 for (ix = 0; ix < len;) { /* string is 0-terminated */
11140 c = utf[ix];
11141 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11142 if (c >= 0x20)
11143 ix++;
11144 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11145 ix++;
11146 else
11147 return(-ix);
11148 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011149 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011150 if ((utf[ix+1] & 0xc0 ) != 0x80)
11151 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011152 codepoint = (utf[ix] & 0x1f) << 6;
11153 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011154 if (!xmlIsCharQ(codepoint))
11155 return(-ix);
11156 ix += 2;
11157 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011158 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011159 if (((utf[ix+1] & 0xc0) != 0x80) ||
11160 ((utf[ix+2] & 0xc0) != 0x80))
11161 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011162 codepoint = (utf[ix] & 0xf) << 12;
11163 codepoint |= (utf[ix+1] & 0x3f) << 6;
11164 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011165 if (!xmlIsCharQ(codepoint))
11166 return(-ix);
11167 ix += 3;
11168 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011169 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011170 if (((utf[ix+1] & 0xc0) != 0x80) ||
11171 ((utf[ix+2] & 0xc0) != 0x80) ||
11172 ((utf[ix+3] & 0xc0) != 0x80))
11173 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011174 codepoint = (utf[ix] & 0x7) << 18;
11175 codepoint |= (utf[ix+1] & 0x3f) << 12;
11176 codepoint |= (utf[ix+2] & 0x3f) << 6;
11177 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011178 if (!xmlIsCharQ(codepoint))
11179 return(-ix);
11180 ix += 4;
11181 } else /* unknown encoding */
11182 return(-ix);
11183 }
11184 return(ix);
11185}
11186
11187/**
Owen Taylor3473f882001-02-23 17:55:21 +000011188 * xmlParseTryOrFinish:
11189 * @ctxt: an XML parser context
11190 * @terminate: last chunk indicator
11191 *
11192 * Try to progress on parsing
11193 *
11194 * Returns zero if no parsing was possible
11195 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011196static int
Owen Taylor3473f882001-02-23 17:55:21 +000011197xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11198 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011199 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011200 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011201 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011202
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011203 if (ctxt->input == NULL)
11204 return(0);
11205
Owen Taylor3473f882001-02-23 17:55:21 +000011206#ifdef DEBUG_PUSH
11207 switch (ctxt->instate) {
11208 case XML_PARSER_EOF:
11209 xmlGenericError(xmlGenericErrorContext,
11210 "PP: try EOF\n"); break;
11211 case XML_PARSER_START:
11212 xmlGenericError(xmlGenericErrorContext,
11213 "PP: try START\n"); break;
11214 case XML_PARSER_MISC:
11215 xmlGenericError(xmlGenericErrorContext,
11216 "PP: try MISC\n");break;
11217 case XML_PARSER_COMMENT:
11218 xmlGenericError(xmlGenericErrorContext,
11219 "PP: try COMMENT\n");break;
11220 case XML_PARSER_PROLOG:
11221 xmlGenericError(xmlGenericErrorContext,
11222 "PP: try PROLOG\n");break;
11223 case XML_PARSER_START_TAG:
11224 xmlGenericError(xmlGenericErrorContext,
11225 "PP: try START_TAG\n");break;
11226 case XML_PARSER_CONTENT:
11227 xmlGenericError(xmlGenericErrorContext,
11228 "PP: try CONTENT\n");break;
11229 case XML_PARSER_CDATA_SECTION:
11230 xmlGenericError(xmlGenericErrorContext,
11231 "PP: try CDATA_SECTION\n");break;
11232 case XML_PARSER_END_TAG:
11233 xmlGenericError(xmlGenericErrorContext,
11234 "PP: try END_TAG\n");break;
11235 case XML_PARSER_ENTITY_DECL:
11236 xmlGenericError(xmlGenericErrorContext,
11237 "PP: try ENTITY_DECL\n");break;
11238 case XML_PARSER_ENTITY_VALUE:
11239 xmlGenericError(xmlGenericErrorContext,
11240 "PP: try ENTITY_VALUE\n");break;
11241 case XML_PARSER_ATTRIBUTE_VALUE:
11242 xmlGenericError(xmlGenericErrorContext,
11243 "PP: try ATTRIBUTE_VALUE\n");break;
11244 case XML_PARSER_DTD:
11245 xmlGenericError(xmlGenericErrorContext,
11246 "PP: try DTD\n");break;
11247 case XML_PARSER_EPILOG:
11248 xmlGenericError(xmlGenericErrorContext,
11249 "PP: try EPILOG\n");break;
11250 case XML_PARSER_PI:
11251 xmlGenericError(xmlGenericErrorContext,
11252 "PP: try PI\n");break;
11253 case XML_PARSER_IGNORE:
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: try IGNORE\n");break;
11256 }
11257#endif
11258
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011259 if ((ctxt->input != NULL) &&
11260 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011261 xmlSHRINK(ctxt);
11262 ctxt->checkIndex = 0;
11263 }
11264 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011265
Daniel Veillarde50ba812013-04-11 15:54:51 +080011266 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011267 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011268 return(0);
11269
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011270 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011271 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011272 avail = ctxt->input->length -
11273 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011274 else {
11275 /*
11276 * If we are operating on converted input, try to flush
Haibo Huangcfd91dc2020-07-30 23:01:33 -070011277 * remaining chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011278 * buffer. But do not do this in document start where
11279 * encoding="..." may not have been read and we work on a
11280 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011281 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011282 if ((ctxt->instate != XML_PARSER_START) &&
11283 (ctxt->input->buf->raw != NULL) &&
11284 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011285 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11286 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011287 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011288
11289 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011290 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11291 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011292 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011293 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011294 (ctxt->input->cur - ctxt->input->base);
11295 }
Owen Taylor3473f882001-02-23 17:55:21 +000011296 if (avail < 1)
11297 goto done;
11298 switch (ctxt->instate) {
11299 case XML_PARSER_EOF:
11300 /*
11301 * Document parsing is done !
11302 */
11303 goto done;
11304 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011305 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11306 xmlChar start[4];
11307 xmlCharEncoding enc;
11308
11309 /*
11310 * Very first chars read from the document flow.
11311 */
11312 if (avail < 4)
11313 goto done;
11314
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011315 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011316 * Get the 4 first bytes and decode the charset
11317 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011318 * plug some encoding conversion routines,
11319 * else xmlSwitchEncoding will set to (default)
11320 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011321 */
11322 start[0] = RAW;
11323 start[1] = NXT(1);
11324 start[2] = NXT(2);
11325 start[3] = NXT(3);
11326 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011327 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011328 break;
11329 }
Owen Taylor3473f882001-02-23 17:55:21 +000011330
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011331 if (avail < 2)
11332 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011333 cur = ctxt->input->cur[0];
11334 next = ctxt->input->cur[1];
11335 if (cur == 0) {
11336 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11337 ctxt->sax->setDocumentLocator(ctxt->userData,
11338 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011339 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011340 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011341#ifdef DEBUG_PUSH
11342 xmlGenericError(xmlGenericErrorContext,
11343 "PP: entering EOF\n");
11344#endif
11345 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11346 ctxt->sax->endDocument(ctxt->userData);
11347 goto done;
11348 }
11349 if ((cur == '<') && (next == '?')) {
11350 /* PI or XML decl */
11351 if (avail < 5) return(ret);
11352 if ((!terminate) &&
11353 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11354 return(ret);
11355 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11356 ctxt->sax->setDocumentLocator(ctxt->userData,
11357 &xmlDefaultSAXLocator);
11358 if ((ctxt->input->cur[2] == 'x') &&
11359 (ctxt->input->cur[3] == 'm') &&
11360 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011361 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011362 ret += 5;
11363#ifdef DEBUG_PUSH
11364 xmlGenericError(xmlGenericErrorContext,
11365 "PP: Parsing XML Decl\n");
11366#endif
11367 xmlParseXMLDecl(ctxt);
11368 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11369 /*
11370 * The XML REC instructs us to stop parsing right
11371 * here
11372 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011373 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011374 return(0);
11375 }
11376 ctxt->standalone = ctxt->input->standalone;
11377 if ((ctxt->encoding == NULL) &&
11378 (ctxt->input->encoding != NULL))
11379 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11380 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11381 (!ctxt->disableSAX))
11382 ctxt->sax->startDocument(ctxt->userData);
11383 ctxt->instate = XML_PARSER_MISC;
11384#ifdef DEBUG_PUSH
11385 xmlGenericError(xmlGenericErrorContext,
11386 "PP: entering MISC\n");
11387#endif
11388 } else {
11389 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11390 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11391 (!ctxt->disableSAX))
11392 ctxt->sax->startDocument(ctxt->userData);
11393 ctxt->instate = XML_PARSER_MISC;
11394#ifdef DEBUG_PUSH
11395 xmlGenericError(xmlGenericErrorContext,
11396 "PP: entering MISC\n");
11397#endif
11398 }
11399 } else {
11400 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11401 ctxt->sax->setDocumentLocator(ctxt->userData,
11402 &xmlDefaultSAXLocator);
11403 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011404 if (ctxt->version == NULL) {
11405 xmlErrMemory(ctxt, NULL);
11406 break;
11407 }
Owen Taylor3473f882001-02-23 17:55:21 +000011408 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11409 (!ctxt->disableSAX))
11410 ctxt->sax->startDocument(ctxt->userData);
11411 ctxt->instate = XML_PARSER_MISC;
11412#ifdef DEBUG_PUSH
11413 xmlGenericError(xmlGenericErrorContext,
11414 "PP: entering MISC\n");
11415#endif
11416 }
11417 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011418 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011419 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011420 const xmlChar *prefix = NULL;
11421 const xmlChar *URI = NULL;
Elliott Hughese54f00d2021-05-13 08:13:46 -070011422 int line = ctxt->input->line;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011423 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011424
11425 if ((avail < 2) && (ctxt->inputNr == 1))
11426 goto done;
11427 cur = ctxt->input->cur[0];
11428 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011429 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011430 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011431 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11432 ctxt->sax->endDocument(ctxt->userData);
11433 goto done;
11434 }
11435 if (!terminate) {
11436 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011437 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011438 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011439 goto done;
11440 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11441 goto done;
11442 }
11443 }
11444 if (ctxt->spaceNr == 0)
11445 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011446 else if (*ctxt->space == -2)
11447 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011448 else
11449 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011450#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011451 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011452#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011453 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011454#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011455 else
11456 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011457#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011458 if (ctxt->instate == XML_PARSER_EOF)
11459 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011460 if (name == NULL) {
11461 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011462 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011463 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11464 ctxt->sax->endDocument(ctxt->userData);
11465 goto done;
11466 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011467#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011468 /*
11469 * [ VC: Root Element Type ]
11470 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011471 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011472 */
11473 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11474 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11475 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011476#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011477
11478 /*
11479 * Check for an Empty Element.
11480 */
11481 if ((RAW == '/') && (NXT(1) == '>')) {
11482 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011483
11484 if (ctxt->sax2) {
11485 if ((ctxt->sax != NULL) &&
11486 (ctxt->sax->endElementNs != NULL) &&
11487 (!ctxt->disableSAX))
11488 ctxt->sax->endElementNs(ctxt->userData, name,
11489 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011490 if (ctxt->nsNr - nsNr > 0)
11491 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011492#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011493 } else {
11494 if ((ctxt->sax != NULL) &&
11495 (ctxt->sax->endElement != NULL) &&
11496 (!ctxt->disableSAX))
11497 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011498#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011499 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011500 if (ctxt->instate == XML_PARSER_EOF)
11501 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011502 spacePop(ctxt);
11503 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011504 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011505 } else {
11506 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011507 }
Daniel Veillard65686452012-07-19 18:25:01 +080011508 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011509 break;
11510 }
11511 if (RAW == '>') {
11512 NEXT;
11513 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011514 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011515 "Couldn't find end of Start Tag %s\n",
11516 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011517 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011518 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011519 }
Elliott Hughese54f00d2021-05-13 08:13:46 -070011520 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011521
Daniel Veillarda880b122003-04-21 21:36:41 +000011522 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011523 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011524 break;
11525 }
11526 case XML_PARSER_CONTENT: {
11527 const xmlChar *test;
11528 unsigned int cons;
11529 if ((avail < 2) && (ctxt->inputNr == 1))
11530 goto done;
11531 cur = ctxt->input->cur[0];
11532 next = ctxt->input->cur[1];
11533
11534 test = CUR_PTR;
11535 cons = ctxt->input->consumed;
11536 if ((cur == '<') && (next == '/')) {
11537 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011538 break;
11539 } else if ((cur == '<') && (next == '?')) {
11540 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011541 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11542 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011543 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011544 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011545 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011546 ctxt->instate = XML_PARSER_CONTENT;
11547 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 } else if ((cur == '<') && (next != '!')) {
11549 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011550 break;
11551 } else if ((cur == '<') && (next == '!') &&
11552 (ctxt->input->cur[2] == '-') &&
11553 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011554 int term;
11555
11556 if (avail < 4)
11557 goto done;
11558 ctxt->input->cur += 4;
11559 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11560 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011561 if ((!terminate) && (term < 0)) {
11562 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011563 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011564 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011565 xmlParseComment(ctxt);
11566 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011567 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011568 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11569 (ctxt->input->cur[2] == '[') &&
11570 (ctxt->input->cur[3] == 'C') &&
11571 (ctxt->input->cur[4] == 'D') &&
11572 (ctxt->input->cur[5] == 'A') &&
11573 (ctxt->input->cur[6] == 'T') &&
11574 (ctxt->input->cur[7] == 'A') &&
11575 (ctxt->input->cur[8] == '[')) {
11576 SKIP(9);
11577 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011578 break;
11579 } else if ((cur == '<') && (next == '!') &&
11580 (avail < 9)) {
11581 goto done;
11582 } else if (cur == '&') {
11583 if ((!terminate) &&
11584 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11585 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011586 xmlParseReference(ctxt);
11587 } else {
11588 /* TODO Avoid the extra copy, handle directly !!! */
11589 /*
11590 * Goal of the following test is:
11591 * - minimize calls to the SAX 'character' callback
11592 * when they are mergeable
11593 * - handle an problem for isBlank when we only parse
11594 * a sequence of blank chars and the next one is
11595 * not available to check against '<' presence.
11596 * - tries to homogenize the differences in SAX
11597 * callbacks between the push and pull versions
11598 * of the parser.
11599 */
11600 if ((ctxt->inputNr == 1) &&
11601 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11602 if (!terminate) {
11603 if (ctxt->progressive) {
11604 if ((lastlt == NULL) ||
11605 (ctxt->input->cur > lastlt))
11606 goto done;
11607 } else if (xmlParseLookupSequence(ctxt,
11608 '<', 0, 0) < 0) {
11609 goto done;
11610 }
11611 }
11612 }
11613 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011614 xmlParseCharData(ctxt, 0);
11615 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011616 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011617 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11618 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011619 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011620 break;
11621 }
11622 break;
11623 }
11624 case XML_PARSER_END_TAG:
11625 if (avail < 2)
11626 goto done;
11627 if (!terminate) {
11628 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011629 /* > can be found unescaped in attribute values */
11630 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011631 goto done;
11632 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11633 goto done;
11634 }
11635 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011636 if (ctxt->sax2) {
Elliott Hughese54f00d2021-05-13 08:13:46 -070011637 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011638 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011639 }
11640#ifdef LIBXML_SAX1_ENABLED
11641 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011642 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011643#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011644 if (ctxt->instate == XML_PARSER_EOF) {
11645 /* Nothing */
11646 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011647 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011648 } else {
11649 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011650 }
11651 break;
11652 case XML_PARSER_CDATA_SECTION: {
11653 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011654 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011655 * cdataBlock merge back contiguous callbacks.
11656 */
11657 int base;
11658
11659 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11660 if (base < 0) {
11661 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011662 int tmp;
11663
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011664 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011665 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011666 if (tmp < 0) {
11667 tmp = -tmp;
11668 ctxt->input->cur += tmp;
11669 goto encoding_error;
11670 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11672 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011673 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011674 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011675 else if (ctxt->sax->characters != NULL)
11676 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011677 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011678 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011679 if (ctxt->instate == XML_PARSER_EOF)
11680 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011681 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011682 ctxt->checkIndex = 0;
11683 }
11684 goto done;
11685 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011686 int tmp;
11687
David Kilzer4f8606c2016-01-05 13:38:09 -080011688 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011689 if ((tmp < 0) || (tmp != base)) {
11690 tmp = -tmp;
11691 ctxt->input->cur += tmp;
11692 goto encoding_error;
11693 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011694 if ((ctxt->sax != NULL) && (base == 0) &&
11695 (ctxt->sax->cdataBlock != NULL) &&
11696 (!ctxt->disableSAX)) {
11697 /*
11698 * Special case to provide identical behaviour
11699 * between pull and push parsers on enpty CDATA
11700 * sections
11701 */
11702 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11703 (!strncmp((const char *)&ctxt->input->cur[-9],
11704 "<![CDATA[", 9)))
11705 ctxt->sax->cdataBlock(ctxt->userData,
11706 BAD_CAST "", 0);
11707 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011708 (!ctxt->disableSAX)) {
11709 if (ctxt->sax->cdataBlock != NULL)
11710 ctxt->sax->cdataBlock(ctxt->userData,
11711 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011712 else if (ctxt->sax->characters != NULL)
11713 ctxt->sax->characters(ctxt->userData,
11714 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011715 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011716 if (ctxt->instate == XML_PARSER_EOF)
11717 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011718 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011719 ctxt->checkIndex = 0;
11720 ctxt->instate = XML_PARSER_CONTENT;
11721#ifdef DEBUG_PUSH
11722 xmlGenericError(xmlGenericErrorContext,
11723 "PP: entering CONTENT\n");
11724#endif
11725 }
11726 break;
11727 }
Owen Taylor3473f882001-02-23 17:55:21 +000011728 case XML_PARSER_MISC:
11729 SKIP_BLANKS;
11730 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011731 avail = ctxt->input->length -
11732 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011733 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011734 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011735 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011736 if (avail < 2)
11737 goto done;
11738 cur = ctxt->input->cur[0];
11739 next = ctxt->input->cur[1];
11740 if ((cur == '<') && (next == '?')) {
11741 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011742 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11743 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011744 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011745 }
Owen Taylor3473f882001-02-23 17:55:21 +000011746#ifdef DEBUG_PUSH
11747 xmlGenericError(xmlGenericErrorContext,
11748 "PP: Parsing PI\n");
11749#endif
11750 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011751 if (ctxt->instate == XML_PARSER_EOF)
11752 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011753 ctxt->instate = XML_PARSER_MISC;
11754 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011755 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011756 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011757 (ctxt->input->cur[2] == '-') &&
11758 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011759 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011760 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11761 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011762 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011763 }
Owen Taylor3473f882001-02-23 17:55:21 +000011764#ifdef DEBUG_PUSH
11765 xmlGenericError(xmlGenericErrorContext,
11766 "PP: Parsing Comment\n");
11767#endif
11768 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011769 if (ctxt->instate == XML_PARSER_EOF)
11770 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011771 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011772 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011773 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011774 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011775 (ctxt->input->cur[2] == 'D') &&
11776 (ctxt->input->cur[3] == 'O') &&
11777 (ctxt->input->cur[4] == 'C') &&
11778 (ctxt->input->cur[5] == 'T') &&
11779 (ctxt->input->cur[6] == 'Y') &&
11780 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011781 (ctxt->input->cur[8] == 'E')) {
11782 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011783 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11784 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011785 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011786 }
Owen Taylor3473f882001-02-23 17:55:21 +000011787#ifdef DEBUG_PUSH
11788 xmlGenericError(xmlGenericErrorContext,
11789 "PP: Parsing internal subset\n");
11790#endif
11791 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011792 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011793 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011794 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011795 if (ctxt->instate == XML_PARSER_EOF)
11796 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011797 if (RAW == '[') {
11798 ctxt->instate = XML_PARSER_DTD;
11799#ifdef DEBUG_PUSH
11800 xmlGenericError(xmlGenericErrorContext,
11801 "PP: entering DTD\n");
11802#endif
11803 } else {
11804 /*
11805 * Create and update the external subset.
11806 */
11807 ctxt->inSubset = 2;
11808 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11809 (ctxt->sax->externalSubset != NULL))
11810 ctxt->sax->externalSubset(ctxt->userData,
11811 ctxt->intSubName, ctxt->extSubSystem,
11812 ctxt->extSubURI);
11813 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011814 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011815 ctxt->instate = XML_PARSER_PROLOG;
11816#ifdef DEBUG_PUSH
11817 xmlGenericError(xmlGenericErrorContext,
11818 "PP: entering PROLOG\n");
11819#endif
11820 }
11821 } else if ((cur == '<') && (next == '!') &&
11822 (avail < 9)) {
11823 goto done;
11824 } else {
11825 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011826 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011827 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011828#ifdef DEBUG_PUSH
11829 xmlGenericError(xmlGenericErrorContext,
11830 "PP: entering START_TAG\n");
11831#endif
11832 }
11833 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011834 case XML_PARSER_PROLOG:
11835 SKIP_BLANKS;
11836 if (ctxt->input->buf == NULL)
11837 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11838 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011839 avail = xmlBufUse(ctxt->input->buf->buffer) -
11840 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011841 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011842 goto done;
11843 cur = ctxt->input->cur[0];
11844 next = ctxt->input->cur[1];
11845 if ((cur == '<') && (next == '?')) {
11846 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011847 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11848 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011849 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011850 }
Owen Taylor3473f882001-02-23 17:55:21 +000011851#ifdef DEBUG_PUSH
11852 xmlGenericError(xmlGenericErrorContext,
11853 "PP: Parsing PI\n");
11854#endif
11855 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011856 if (ctxt->instate == XML_PARSER_EOF)
11857 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011858 ctxt->instate = XML_PARSER_PROLOG;
11859 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011860 } else if ((cur == '<') && (next == '!') &&
11861 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11862 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011863 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11864 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011865 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011866 }
Owen Taylor3473f882001-02-23 17:55:21 +000011867#ifdef DEBUG_PUSH
11868 xmlGenericError(xmlGenericErrorContext,
11869 "PP: Parsing Comment\n");
11870#endif
11871 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011872 if (ctxt->instate == XML_PARSER_EOF)
11873 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011874 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011875 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011876 } else if ((cur == '<') && (next == '!') &&
11877 (avail < 4)) {
11878 goto done;
11879 } else {
11880 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011881 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011882 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011883 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011884#ifdef DEBUG_PUSH
11885 xmlGenericError(xmlGenericErrorContext,
11886 "PP: entering START_TAG\n");
11887#endif
11888 }
11889 break;
11890 case XML_PARSER_EPILOG:
11891 SKIP_BLANKS;
11892 if (ctxt->input->buf == NULL)
11893 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11894 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011895 avail = xmlBufUse(ctxt->input->buf->buffer) -
11896 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011897 if (avail < 2)
11898 goto done;
11899 cur = ctxt->input->cur[0];
11900 next = ctxt->input->cur[1];
11901 if ((cur == '<') && (next == '?')) {
11902 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011903 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11904 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011905 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011906 }
Owen Taylor3473f882001-02-23 17:55:21 +000011907#ifdef DEBUG_PUSH
11908 xmlGenericError(xmlGenericErrorContext,
11909 "PP: Parsing PI\n");
11910#endif
11911 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011912 if (ctxt->instate == XML_PARSER_EOF)
11913 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011914 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011915 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011916 } else if ((cur == '<') && (next == '!') &&
11917 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11918 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011919 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11920 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011921 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011922 }
Owen Taylor3473f882001-02-23 17:55:21 +000011923#ifdef DEBUG_PUSH
11924 xmlGenericError(xmlGenericErrorContext,
11925 "PP: Parsing Comment\n");
11926#endif
11927 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011928 if (ctxt->instate == XML_PARSER_EOF)
11929 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011930 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011931 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011932 } else if ((cur == '<') && (next == '!') &&
11933 (avail < 4)) {
11934 goto done;
11935 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011936 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011937 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011938#ifdef DEBUG_PUSH
11939 xmlGenericError(xmlGenericErrorContext,
11940 "PP: entering EOF\n");
11941#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011942 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011943 ctxt->sax->endDocument(ctxt->userData);
11944 goto done;
11945 }
11946 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011947 case XML_PARSER_DTD: {
11948 /*
11949 * Sorry but progressive parsing of the internal subset
11950 * is not expected to be supported. We first check that
11951 * the full content of the internal subset is available and
11952 * the parsing is launched only at that point.
11953 * Internal subset ends up with "']' S? '>'" in an unescaped
11954 * section and not in a ']]>' sequence which are conditional
11955 * sections (whoever argued to keep that crap in XML deserve
11956 * a place in hell !).
11957 */
11958 int base, i;
11959 xmlChar *buf;
11960 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011961 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011962
11963 base = ctxt->input->cur - ctxt->input->base;
11964 if (base < 0) return(0);
11965 if (ctxt->checkIndex > base)
11966 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011967 buf = xmlBufContent(ctxt->input->buf->buffer);
11968 use = xmlBufUse(ctxt->input->buf->buffer);
11969 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011970 if (quote != 0) {
11971 if (buf[base] == quote)
11972 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011973 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011974 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011975 if ((quote == 0) && (buf[base] == '<')) {
11976 int found = 0;
11977 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011978 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011979 (buf[base + 1] == '!') &&
11980 (buf[base + 2] == '-') &&
11981 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011982 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011983 if ((buf[base] == '-') &&
11984 (buf[base + 1] == '-') &&
11985 (buf[base + 2] == '>')) {
11986 found = 1;
11987 base += 2;
11988 break;
11989 }
11990 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011991 if (!found) {
11992#if 0
11993 fprintf(stderr, "unfinished comment\n");
11994#endif
11995 break; /* for */
11996 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011997 continue;
11998 }
11999 }
Owen Taylor3473f882001-02-23 17:55:21 +000012000 if (buf[base] == '"') {
12001 quote = '"';
12002 continue;
12003 }
12004 if (buf[base] == '\'') {
12005 quote = '\'';
12006 continue;
12007 }
12008 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012009#if 0
12010 fprintf(stderr, "%c%c%c%c: ", buf[base],
12011 buf[base + 1], buf[base + 2], buf[base + 3]);
12012#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012013 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012014 break;
12015 if (buf[base + 1] == ']') {
12016 /* conditional crap, skip both ']' ! */
12017 base++;
12018 continue;
12019 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012020 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012021 if (buf[base + i] == '>') {
12022#if 0
12023 fprintf(stderr, "found\n");
12024#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012025 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012026 }
12027 if (!IS_BLANK_CH(buf[base + i])) {
12028#if 0
12029 fprintf(stderr, "not found\n");
12030#endif
12031 goto not_end_of_int_subset;
12032 }
Owen Taylor3473f882001-02-23 17:55:21 +000012033 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012034#if 0
12035 fprintf(stderr, "end of stream\n");
12036#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012037 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012038
Owen Taylor3473f882001-02-23 17:55:21 +000012039 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012040not_end_of_int_subset:
12041 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012042 }
12043 /*
12044 * We didn't found the end of the Internal subset
12045 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012046 if (quote == 0)
12047 ctxt->checkIndex = base;
12048 else
12049 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012050#ifdef DEBUG_PUSH
12051 if (next == 0)
12052 xmlGenericError(xmlGenericErrorContext,
12053 "PP: lookup of int subset end filed\n");
12054#endif
12055 goto done;
12056
12057found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012058 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012059 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012060 if (ctxt->instate == XML_PARSER_EOF)
12061 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012062 ctxt->inSubset = 2;
12063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12064 (ctxt->sax->externalSubset != NULL))
12065 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12066 ctxt->extSubSystem, ctxt->extSubURI);
12067 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012068 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012069 if (ctxt->instate == XML_PARSER_EOF)
12070 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012071 ctxt->instate = XML_PARSER_PROLOG;
12072 ctxt->checkIndex = 0;
12073#ifdef DEBUG_PUSH
12074 xmlGenericError(xmlGenericErrorContext,
12075 "PP: entering PROLOG\n");
12076#endif
12077 break;
12078 }
12079 case XML_PARSER_COMMENT:
12080 xmlGenericError(xmlGenericErrorContext,
12081 "PP: internal error, state == COMMENT\n");
12082 ctxt->instate = XML_PARSER_CONTENT;
12083#ifdef DEBUG_PUSH
12084 xmlGenericError(xmlGenericErrorContext,
12085 "PP: entering CONTENT\n");
12086#endif
12087 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012088 case XML_PARSER_IGNORE:
12089 xmlGenericError(xmlGenericErrorContext,
12090 "PP: internal error, state == IGNORE");
12091 ctxt->instate = XML_PARSER_DTD;
12092#ifdef DEBUG_PUSH
12093 xmlGenericError(xmlGenericErrorContext,
12094 "PP: entering DTD\n");
12095#endif
12096 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012097 case XML_PARSER_PI:
12098 xmlGenericError(xmlGenericErrorContext,
12099 "PP: internal error, state == PI\n");
12100 ctxt->instate = XML_PARSER_CONTENT;
12101#ifdef DEBUG_PUSH
12102 xmlGenericError(xmlGenericErrorContext,
12103 "PP: entering CONTENT\n");
12104#endif
12105 break;
12106 case XML_PARSER_ENTITY_DECL:
12107 xmlGenericError(xmlGenericErrorContext,
12108 "PP: internal error, state == ENTITY_DECL\n");
12109 ctxt->instate = XML_PARSER_DTD;
12110#ifdef DEBUG_PUSH
12111 xmlGenericError(xmlGenericErrorContext,
12112 "PP: entering DTD\n");
12113#endif
12114 break;
12115 case XML_PARSER_ENTITY_VALUE:
12116 xmlGenericError(xmlGenericErrorContext,
12117 "PP: internal error, state == ENTITY_VALUE\n");
12118 ctxt->instate = XML_PARSER_CONTENT;
12119#ifdef DEBUG_PUSH
12120 xmlGenericError(xmlGenericErrorContext,
12121 "PP: entering DTD\n");
12122#endif
12123 break;
12124 case XML_PARSER_ATTRIBUTE_VALUE:
12125 xmlGenericError(xmlGenericErrorContext,
12126 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12127 ctxt->instate = XML_PARSER_START_TAG;
12128#ifdef DEBUG_PUSH
12129 xmlGenericError(xmlGenericErrorContext,
12130 "PP: entering START_TAG\n");
12131#endif
12132 break;
12133 case XML_PARSER_SYSTEM_LITERAL:
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: internal error, state == SYSTEM_LITERAL\n");
12136 ctxt->instate = XML_PARSER_START_TAG;
12137#ifdef DEBUG_PUSH
12138 xmlGenericError(xmlGenericErrorContext,
12139 "PP: entering START_TAG\n");
12140#endif
12141 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012142 case XML_PARSER_PUBLIC_LITERAL:
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: internal error, state == PUBLIC_LITERAL\n");
12145 ctxt->instate = XML_PARSER_START_TAG;
12146#ifdef DEBUG_PUSH
12147 xmlGenericError(xmlGenericErrorContext,
12148 "PP: entering START_TAG\n");
12149#endif
12150 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012151 }
12152 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012153done:
Owen Taylor3473f882001-02-23 17:55:21 +000012154#ifdef DEBUG_PUSH
12155 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12156#endif
12157 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012158encoding_error:
12159 {
12160 char buffer[150];
12161
12162 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12163 ctxt->input->cur[0], ctxt->input->cur[1],
12164 ctxt->input->cur[2], ctxt->input->cur[3]);
12165 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12166 "Input is not proper UTF-8, indicate encoding !\n%s",
12167 BAD_CAST buffer, NULL);
12168 }
12169 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012170}
12171
12172/**
Daniel Veillard65686452012-07-19 18:25:01 +080012173 * xmlParseCheckTransition:
12174 * @ctxt: an XML parser context
12175 * @chunk: a char array
12176 * @size: the size in byte of the chunk
12177 *
12178 * Check depending on the current parser state if the chunk given must be
12179 * processed immediately or one need more data to advance on parsing.
12180 *
12181 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12182 */
12183static int
12184xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12185 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12186 return(-1);
12187 if (ctxt->instate == XML_PARSER_START_TAG) {
12188 if (memchr(chunk, '>', size) != NULL)
12189 return(1);
12190 return(0);
12191 }
12192 if (ctxt->progressive == XML_PARSER_COMMENT) {
12193 if (memchr(chunk, '>', size) != NULL)
12194 return(1);
12195 return(0);
12196 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012197 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12198 if (memchr(chunk, '>', size) != NULL)
12199 return(1);
12200 return(0);
12201 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012202 if (ctxt->progressive == XML_PARSER_PI) {
12203 if (memchr(chunk, '>', size) != NULL)
12204 return(1);
12205 return(0);
12206 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012207 if (ctxt->instate == XML_PARSER_END_TAG) {
12208 if (memchr(chunk, '>', size) != NULL)
12209 return(1);
12210 return(0);
12211 }
12212 if ((ctxt->progressive == XML_PARSER_DTD) ||
12213 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012214 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012215 return(1);
12216 return(0);
12217 }
Daniel Veillard65686452012-07-19 18:25:01 +080012218 return(1);
12219}
12220
12221/**
Owen Taylor3473f882001-02-23 17:55:21 +000012222 * xmlParseChunk:
12223 * @ctxt: an XML parser context
12224 * @chunk: an char array
12225 * @size: the size in byte of the chunk
12226 * @terminate: last chunk indicator
12227 *
12228 * Parse a Chunk of memory
12229 *
12230 * Returns zero if no error, the xmlParserErrors otherwise.
12231 */
12232int
12233xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12234 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012235 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012236 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012237 size_t old_avail = 0;
12238 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012239
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012240 if (ctxt == NULL)
12241 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012242 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012243 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012244 if (ctxt->instate == XML_PARSER_EOF)
12245 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012246 if (ctxt->instate == XML_PARSER_START)
12247 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012248 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12249 (chunk[size - 1] == '\r')) {
12250 end_in_lf = 1;
12251 size--;
12252 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012253
12254xmldecl_done:
12255
Owen Taylor3473f882001-02-23 17:55:21 +000012256 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12257 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012258 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12259 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012260 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012261
Daniel Veillard65686452012-07-19 18:25:01 +080012262 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012263 /*
12264 * Specific handling if we autodetected an encoding, we should not
12265 * push more than the first line ... which depend on the encoding
12266 * And only push the rest once the final encoding was detected
12267 */
12268 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12269 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012270 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012271
12272 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12273 BAD_CAST "UTF-16")) ||
12274 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12275 BAD_CAST "UTF16")))
12276 len = 90;
12277 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278 BAD_CAST "UCS-4")) ||
12279 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280 BAD_CAST "UCS4")))
12281 len = 180;
12282
12283 if (ctxt->input->buf->rawconsumed < len)
12284 len -= ctxt->input->buf->rawconsumed;
12285
Raul Hudeaba9716a2010-03-15 10:13:29 +010012286 /*
12287 * Change size for reading the initial declaration only
12288 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12289 * will blindly copy extra bytes from memory.
12290 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012291 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012292 remain = size - len;
12293 size = len;
12294 } else {
12295 remain = 0;
12296 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012297 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012298 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012299 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
William M. Bracka3215c72004-07-31 16:24:01 +000012300 if (res < 0) {
12301 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012302 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012303 return (XML_PARSER_EOF);
12304 }
Owen Taylor3473f882001-02-23 17:55:21 +000012305#ifdef DEBUG_PUSH
12306 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12307#endif
12308
Owen Taylor3473f882001-02-23 17:55:21 +000012309 } else if (ctxt->instate != XML_PARSER_EOF) {
12310 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12311 xmlParserInputBufferPtr in = ctxt->input->buf;
12312 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12313 (in->raw != NULL)) {
12314 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012315 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12316 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012317
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012318 nbchars = xmlCharEncInput(in, terminate);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012319 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012320 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012321 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012322 xmlGenericError(xmlGenericErrorContext,
12323 "xmlParseChunk: encoder error\n");
Nick Wellnhoferab362ab2018-01-22 15:40:05 +010012324 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012325 return(XML_ERR_INVALID_ENCODING);
12326 }
12327 }
12328 }
12329 }
Daniel Veillard65686452012-07-19 18:25:01 +080012330 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012331 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012332 } else {
12333 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12334 avail = xmlBufUse(ctxt->input->buf->buffer);
12335 /*
12336 * Depending on the current state it may not be such
12337 * a good idea to try parsing if there is nothing in the chunk
12338 * which would be worth doing a parser state transition and we
12339 * need to wait for more data
12340 */
12341 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12342 (old_avail == 0) || (avail == 0) ||
12343 (xmlParseCheckTransition(ctxt,
12344 (const char *)&ctxt->input->base[old_avail],
12345 avail - old_avail)))
12346 xmlParseTryOrFinish(ctxt, terminate);
12347 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012348 if (ctxt->instate == XML_PARSER_EOF)
12349 return(ctxt->errNo);
12350
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012351 if ((ctxt->input != NULL) &&
12352 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12353 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12354 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12355 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012356 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012357 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012358 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12359 return(ctxt->errNo);
12360
12361 if (remain != 0) {
12362 chunk += size;
12363 size = remain;
12364 remain = 0;
12365 goto xmldecl_done;
12366 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012367 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12368 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012369 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12370 ctxt->input);
12371 size_t current = ctxt->input->cur - ctxt->input->base;
12372
Daniel Veillarda617e242006-01-09 14:38:44 +000012373 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012374
12375 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12376 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012377 }
Owen Taylor3473f882001-02-23 17:55:21 +000012378 if (terminate) {
12379 /*
12380 * Check for termination
12381 */
Daniel Veillard65686452012-07-19 18:25:01 +080012382 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012383
12384 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012385 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012386 cur_avail = ctxt->input->length -
12387 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012388 else
Daniel Veillard65686452012-07-19 18:25:01 +080012389 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12390 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012391 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012392
Owen Taylor3473f882001-02-23 17:55:21 +000012393 if ((ctxt->instate != XML_PARSER_EOF) &&
12394 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012395 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012396 }
Daniel Veillard65686452012-07-19 18:25:01 +080012397 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012398 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012399 }
Owen Taylor3473f882001-02-23 17:55:21 +000012400 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012401 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012402 ctxt->sax->endDocument(ctxt->userData);
12403 }
12404 ctxt->instate = XML_PARSER_EOF;
12405 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012406 if (ctxt->wellFormed == 0)
12407 return((xmlParserErrors) ctxt->errNo);
12408 else
12409 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012410}
12411
12412/************************************************************************
12413 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012414 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012415 * *
12416 ************************************************************************/
12417
12418/**
Owen Taylor3473f882001-02-23 17:55:21 +000012419 * xmlCreatePushParserCtxt:
12420 * @sax: a SAX handler
12421 * @user_data: The user data returned on SAX callbacks
12422 * @chunk: a pointer to an array of chars
12423 * @size: number of chars in the array
12424 * @filename: an optional file name or URI
12425 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012426 * Create a parser context for using the XML parser in push mode.
12427 * If @buffer and @size are non-NULL, the data is used to detect
12428 * the encoding. The remaining characters will be parsed so they
12429 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012430 * To allow content encoding detection, @size should be >= 4
12431 * The value of @filename is used for fetching external entities
12432 * and error/warning reports.
12433 *
12434 * Returns the new parser context or NULL
12435 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012436
Owen Taylor3473f882001-02-23 17:55:21 +000012437xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012438xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012439 const char *chunk, int size, const char *filename) {
12440 xmlParserCtxtPtr ctxt;
12441 xmlParserInputPtr inputStream;
12442 xmlParserInputBufferPtr buf;
12443 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12444
12445 /*
12446 * plug some encoding conversion routines
12447 */
12448 if ((chunk != NULL) && (size >= 4))
12449 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12450
12451 buf = xmlAllocParserInputBuffer(enc);
12452 if (buf == NULL) return(NULL);
12453
12454 ctxt = xmlNewParserCtxt();
12455 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012456 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012457 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012458 return(NULL);
12459 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012460 ctxt->dictNames = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012461 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012462#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012463 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012464#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012465 xmlFree(ctxt->sax);
12466 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12467 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012468 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012469 xmlFreeParserInputBuffer(buf);
12470 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012471 return(NULL);
12472 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012473 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12474 if (sax->initialized == XML_SAX2_MAGIC)
12475 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12476 else
12477 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012478 if (user_data != NULL)
12479 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012480 }
Owen Taylor3473f882001-02-23 17:55:21 +000012481 if (filename == NULL) {
12482 ctxt->directory = NULL;
12483 } else {
12484 ctxt->directory = xmlParserGetDirectory(filename);
12485 }
12486
12487 inputStream = xmlNewInputStream(ctxt);
12488 if (inputStream == NULL) {
12489 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012490 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012491 return(NULL);
12492 }
12493
12494 if (filename == NULL)
12495 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012496 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012497 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012498 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012499 if (inputStream->filename == NULL) {
12500 xmlFreeParserCtxt(ctxt);
12501 xmlFreeParserInputBuffer(buf);
12502 return(NULL);
12503 }
12504 }
Owen Taylor3473f882001-02-23 17:55:21 +000012505 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012506 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012507 inputPush(ctxt, inputStream);
12508
William M. Brack3a1cd212005-02-11 14:35:54 +000012509 /*
12510 * If the caller didn't provide an initial 'chunk' for determining
12511 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12512 * that it can be automatically determined later
12513 */
12514 if ((size == 0) || (chunk == NULL)) {
12515 ctxt->charset = XML_CHAR_ENCODING_NONE;
12516 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012517 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12518 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012519
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012520 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012521
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012522 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012523#ifdef DEBUG_PUSH
12524 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12525#endif
12526 }
12527
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012528 if (enc != XML_CHAR_ENCODING_NONE) {
12529 xmlSwitchEncoding(ctxt, enc);
12530 }
12531
Owen Taylor3473f882001-02-23 17:55:21 +000012532 return(ctxt);
12533}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012534#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012535
12536/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012537 * xmlHaltParser:
12538 * @ctxt: an XML parser context
12539 *
12540 * Blocks further parser processing don't override error
12541 * for internal use
12542 */
12543static void
12544xmlHaltParser(xmlParserCtxtPtr ctxt) {
12545 if (ctxt == NULL)
12546 return;
12547 ctxt->instate = XML_PARSER_EOF;
12548 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012549 while (ctxt->inputNr > 1)
12550 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012551 if (ctxt->input != NULL) {
12552 /*
12553 * in case there was a specific allocation deallocate before
12554 * overriding base
12555 */
12556 if (ctxt->input->free != NULL) {
12557 ctxt->input->free((xmlChar *) ctxt->input->base);
12558 ctxt->input->free = NULL;
12559 }
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012560 if (ctxt->input->buf != NULL) {
12561 xmlFreeParserInputBuffer(ctxt->input->buf);
12562 ctxt->input->buf = NULL;
12563 }
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012564 ctxt->input->cur = BAD_CAST"";
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012565 ctxt->input->length = 0;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012566 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012567 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012568 }
12569}
12570
12571/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012572 * xmlStopParser:
12573 * @ctxt: an XML parser context
12574 *
12575 * Blocks further parser processing
12576 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012577void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012578xmlStopParser(xmlParserCtxtPtr ctxt) {
12579 if (ctxt == NULL)
12580 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012581 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012582 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012583}
12584
12585/**
Owen Taylor3473f882001-02-23 17:55:21 +000012586 * xmlCreateIOParserCtxt:
12587 * @sax: a SAX handler
12588 * @user_data: The user data returned on SAX callbacks
12589 * @ioread: an I/O read function
12590 * @ioclose: an I/O close function
12591 * @ioctx: an I/O handler
12592 * @enc: the charset encoding if known
12593 *
12594 * Create a parser context for using the XML parser with an existing
12595 * I/O stream
12596 *
12597 * Returns the new parser context or NULL
12598 */
12599xmlParserCtxtPtr
12600xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12601 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12602 void *ioctx, xmlCharEncoding enc) {
12603 xmlParserCtxtPtr ctxt;
12604 xmlParserInputPtr inputStream;
12605 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012606
Daniel Veillard42595322004-11-08 10:52:06 +000012607 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012608
12609 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012610 if (buf == NULL) {
12611 if (ioclose != NULL)
12612 ioclose(ioctx);
12613 return (NULL);
12614 }
Owen Taylor3473f882001-02-23 17:55:21 +000012615
12616 ctxt = xmlNewParserCtxt();
12617 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012618 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012619 return(NULL);
12620 }
12621 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012622#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012623 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012624#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012625 xmlFree(ctxt->sax);
12626 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12627 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012628 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012629 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012630 return(NULL);
12631 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012632 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12633 if (sax->initialized == XML_SAX2_MAGIC)
12634 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12635 else
12636 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012637 if (user_data != NULL)
12638 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012639 }
Owen Taylor3473f882001-02-23 17:55:21 +000012640
12641 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12642 if (inputStream == NULL) {
12643 xmlFreeParserCtxt(ctxt);
12644 return(NULL);
12645 }
12646 inputPush(ctxt, inputStream);
12647
12648 return(ctxt);
12649}
12650
Daniel Veillard4432df22003-09-28 18:58:27 +000012651#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012652/************************************************************************
12653 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012654 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012655 * *
12656 ************************************************************************/
12657
12658/**
12659 * xmlIOParseDTD:
12660 * @sax: the SAX handler block or NULL
12661 * @input: an Input Buffer
12662 * @enc: the charset encoding if known
12663 *
12664 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012665 *
Owen Taylor3473f882001-02-23 17:55:21 +000012666 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012667 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012668 */
12669
12670xmlDtdPtr
12671xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12672 xmlCharEncoding enc) {
12673 xmlDtdPtr ret = NULL;
12674 xmlParserCtxtPtr ctxt;
12675 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012676 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012677
12678 if (input == NULL)
12679 return(NULL);
12680
12681 ctxt = xmlNewParserCtxt();
12682 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012683 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012684 return(NULL);
12685 }
12686
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012687 /* We are loading a DTD */
12688 ctxt->options |= XML_PARSE_DTDLOAD;
12689
Owen Taylor3473f882001-02-23 17:55:21 +000012690 /*
12691 * Set-up the SAX context
12692 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012693 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012694 if (ctxt->sax != NULL)
12695 xmlFree(ctxt->sax);
12696 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012697 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012698 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012699 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012700
12701 /*
12702 * generate a parser input from the I/O handler
12703 */
12704
Daniel Veillard43caefb2003-12-07 19:32:22 +000012705 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012706 if (pinput == NULL) {
12707 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012708 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012709 xmlFreeParserCtxt(ctxt);
12710 return(NULL);
12711 }
12712
12713 /*
12714 * plug some encoding conversion routines here.
12715 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012716 if (xmlPushInput(ctxt, pinput) < 0) {
12717 if (sax != NULL) ctxt->sax = NULL;
12718 xmlFreeParserCtxt(ctxt);
12719 return(NULL);
12720 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012721 if (enc != XML_CHAR_ENCODING_NONE) {
12722 xmlSwitchEncoding(ctxt, enc);
12723 }
Owen Taylor3473f882001-02-23 17:55:21 +000012724
12725 pinput->filename = NULL;
12726 pinput->line = 1;
12727 pinput->col = 1;
12728 pinput->base = ctxt->input->cur;
12729 pinput->cur = ctxt->input->cur;
12730 pinput->free = NULL;
12731
12732 /*
12733 * let's parse that entity knowing it's an external subset.
12734 */
12735 ctxt->inSubset = 2;
12736 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012737 if (ctxt->myDoc == NULL) {
12738 xmlErrMemory(ctxt, "New Doc failed");
12739 return(NULL);
12740 }
12741 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012742 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12743 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012744
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012745 if ((enc == XML_CHAR_ENCODING_NONE) &&
12746 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012747 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012748 * Get the 4 first bytes and decode the charset
12749 * if enc != XML_CHAR_ENCODING_NONE
12750 * plug some encoding conversion routines.
12751 */
12752 start[0] = RAW;
12753 start[1] = NXT(1);
12754 start[2] = NXT(2);
12755 start[3] = NXT(3);
12756 enc = xmlDetectCharEncoding(start, 4);
12757 if (enc != XML_CHAR_ENCODING_NONE) {
12758 xmlSwitchEncoding(ctxt, enc);
12759 }
12760 }
12761
Owen Taylor3473f882001-02-23 17:55:21 +000012762 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12763
12764 if (ctxt->myDoc != NULL) {
12765 if (ctxt->wellFormed) {
12766 ret = ctxt->myDoc->extSubset;
12767 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012768 if (ret != NULL) {
12769 xmlNodePtr tmp;
12770
12771 ret->doc = NULL;
12772 tmp = ret->children;
12773 while (tmp != NULL) {
12774 tmp->doc = NULL;
12775 tmp = tmp->next;
12776 }
12777 }
Owen Taylor3473f882001-02-23 17:55:21 +000012778 } else {
12779 ret = NULL;
12780 }
12781 xmlFreeDoc(ctxt->myDoc);
12782 ctxt->myDoc = NULL;
12783 }
12784 if (sax != NULL) ctxt->sax = NULL;
12785 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012786
Owen Taylor3473f882001-02-23 17:55:21 +000012787 return(ret);
12788}
12789
12790/**
12791 * xmlSAXParseDTD:
12792 * @sax: the SAX handler block
12793 * @ExternalID: a NAME* containing the External ID of the DTD
12794 * @SystemID: a NAME* containing the URL to the DTD
12795 *
12796 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012797 *
Owen Taylor3473f882001-02-23 17:55:21 +000012798 * Returns the resulting xmlDtdPtr or NULL in case of error.
12799 */
12800
12801xmlDtdPtr
12802xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12803 const xmlChar *SystemID) {
12804 xmlDtdPtr ret = NULL;
12805 xmlParserCtxtPtr ctxt;
12806 xmlParserInputPtr input = NULL;
12807 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012808 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012809
12810 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12811
12812 ctxt = xmlNewParserCtxt();
12813 if (ctxt == NULL) {
12814 return(NULL);
12815 }
12816
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012817 /* We are loading a DTD */
12818 ctxt->options |= XML_PARSE_DTDLOAD;
12819
Owen Taylor3473f882001-02-23 17:55:21 +000012820 /*
12821 * Set-up the SAX context
12822 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012823 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012824 if (ctxt->sax != NULL)
12825 xmlFree(ctxt->sax);
12826 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012827 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012828 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012829
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012830 /*
12831 * Canonicalise the system ID
12832 */
12833 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012834 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012835 xmlFreeParserCtxt(ctxt);
12836 return(NULL);
12837 }
Owen Taylor3473f882001-02-23 17:55:21 +000012838
12839 /*
12840 * Ask the Entity resolver to load the damn thing
12841 */
12842
12843 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012844 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12845 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012846 if (input == NULL) {
12847 if (sax != NULL) ctxt->sax = NULL;
12848 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012849 if (systemIdCanonic != NULL)
12850 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012851 return(NULL);
12852 }
12853
12854 /*
12855 * plug some encoding conversion routines here.
12856 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012857 if (xmlPushInput(ctxt, input) < 0) {
12858 if (sax != NULL) ctxt->sax = NULL;
12859 xmlFreeParserCtxt(ctxt);
12860 if (systemIdCanonic != NULL)
12861 xmlFree(systemIdCanonic);
12862 return(NULL);
12863 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012864 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12865 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12866 xmlSwitchEncoding(ctxt, enc);
12867 }
Owen Taylor3473f882001-02-23 17:55:21 +000012868
12869 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012870 input->filename = (char *) systemIdCanonic;
12871 else
12872 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012873 input->line = 1;
12874 input->col = 1;
12875 input->base = ctxt->input->cur;
12876 input->cur = ctxt->input->cur;
12877 input->free = NULL;
12878
12879 /*
12880 * let's parse that entity knowing it's an external subset.
12881 */
12882 ctxt->inSubset = 2;
12883 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012884 if (ctxt->myDoc == NULL) {
12885 xmlErrMemory(ctxt, "New Doc failed");
12886 if (sax != NULL) ctxt->sax = NULL;
12887 xmlFreeParserCtxt(ctxt);
12888 return(NULL);
12889 }
12890 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012891 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12892 ExternalID, SystemID);
12893 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12894
12895 if (ctxt->myDoc != NULL) {
12896 if (ctxt->wellFormed) {
12897 ret = ctxt->myDoc->extSubset;
12898 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012899 if (ret != NULL) {
12900 xmlNodePtr tmp;
12901
12902 ret->doc = NULL;
12903 tmp = ret->children;
12904 while (tmp != NULL) {
12905 tmp->doc = NULL;
12906 tmp = tmp->next;
12907 }
12908 }
Owen Taylor3473f882001-02-23 17:55:21 +000012909 } else {
12910 ret = NULL;
12911 }
12912 xmlFreeDoc(ctxt->myDoc);
12913 ctxt->myDoc = NULL;
12914 }
12915 if (sax != NULL) ctxt->sax = NULL;
12916 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012917
Owen Taylor3473f882001-02-23 17:55:21 +000012918 return(ret);
12919}
12920
Daniel Veillard4432df22003-09-28 18:58:27 +000012921
Owen Taylor3473f882001-02-23 17:55:21 +000012922/**
12923 * xmlParseDTD:
12924 * @ExternalID: a NAME* containing the External ID of the DTD
12925 * @SystemID: a NAME* containing the URL to the DTD
12926 *
12927 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012928 *
Owen Taylor3473f882001-02-23 17:55:21 +000012929 * Returns the resulting xmlDtdPtr or NULL in case of error.
12930 */
12931
12932xmlDtdPtr
12933xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12934 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12935}
Daniel Veillard4432df22003-09-28 18:58:27 +000012936#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012937
12938/************************************************************************
12939 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012940 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012941 * *
12942 ************************************************************************/
12943
12944/**
Owen Taylor3473f882001-02-23 17:55:21 +000012945 * xmlParseCtxtExternalEntity:
12946 * @ctx: the existing parsing context
12947 * @URL: the URL for the entity to load
12948 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012949 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012950 *
12951 * Parse an external general entity within an existing parsing context
12952 * An external general parsed entity is well-formed if it matches the
12953 * production labeled extParsedEnt.
12954 *
12955 * [78] extParsedEnt ::= TextDecl? content
12956 *
12957 * Returns 0 if the entity is well formed, -1 in case of args problem and
12958 * the parser error code otherwise
12959 */
12960
12961int
12962xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012963 const xmlChar *ID, xmlNodePtr *lst) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012964 void *userData;
Owen Taylor3473f882001-02-23 17:55:21 +000012965
Daniel Veillardce682bc2004-11-05 17:22:25 +000012966 if (ctx == NULL) return(-1);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012967 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012968 * If the user provided their own SAX callbacks, then reuse the
12969 * userData callback field, otherwise the expected setup in a
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012970 * DOM builder is to have userData == ctxt
12971 */
12972 if (ctx->userData == ctx)
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012973 userData = NULL;
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012974 else
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012975 userData = ctx->userData;
12976 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12977 userData, ctx->depth + 1,
12978 URL, ID, lst);
Owen Taylor3473f882001-02-23 17:55:21 +000012979}
12980
12981/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012982 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012983 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012984 * @oldctxt: the previous parser context if available
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012985 * @sax: the SAX handler block (possibly NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000012986 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12987 * @depth: Used for loop detection, use 0
12988 * @URL: the URL for the entity to load
12989 * @ID: the System ID for the entity to load
12990 * @list: the return value for the set of parsed nodes
12991 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012992 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012993 *
12994 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995 * the parser error code otherwise
12996 */
12997
Daniel Veillard7d515752003-09-26 19:12:37 +000012998static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012999xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13000 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013001 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013002 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013003 xmlParserCtxtPtr ctxt;
13004 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013005 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013006 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013007 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013008 xmlChar start[4];
13009 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013010
Daniel Veillard0161e632008-08-28 15:36:32 +000013011 if (((depth > 40) &&
13012 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13013 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013014 return(XML_ERR_ENTITY_LOOP);
13015 }
13016
Owen Taylor3473f882001-02-23 17:55:21 +000013017 if (list != NULL)
13018 *list = NULL;
13019 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013020 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013021 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013022 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013023
13024
Rob Richards9c0aa472009-03-26 18:10:19 +000013025 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013026 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013027 ctxt->userData = ctxt;
13028 if (sax != NULL) {
13029 oldsax = ctxt->sax;
13030 ctxt->sax = sax;
13031 if (user_data != NULL)
13032 ctxt->userData = user_data;
13033 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013034 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013035 newDoc = xmlNewDoc(BAD_CAST "1.0");
13036 if (newDoc == NULL) {
13037 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013038 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013039 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013040 newDoc->properties = XML_DOC_INTERNAL;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013041 if (doc) {
13042 newDoc->intSubset = doc->intSubset;
13043 newDoc->extSubset = doc->extSubset;
13044 if (doc->dict) {
13045 newDoc->dict = doc->dict;
13046 xmlDictReference(newDoc->dict);
13047 }
13048 if (doc->URL != NULL) {
13049 newDoc->URL = xmlStrdup(doc->URL);
13050 }
Owen Taylor3473f882001-02-23 17:55:21 +000013051 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013052 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13053 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013054 if (sax != NULL)
13055 ctxt->sax = oldsax;
13056 xmlFreeParserCtxt(ctxt);
13057 newDoc->intSubset = NULL;
13058 newDoc->extSubset = NULL;
13059 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013060 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013061 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013062 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013063 nodePush(ctxt, newDoc->children);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013064 if (doc == NULL) {
13065 ctxt->myDoc = newDoc;
13066 } else {
13067 ctxt->myDoc = doc;
13068 newRoot->doc = doc;
13069 }
Owen Taylor3473f882001-02-23 17:55:21 +000013070
Daniel Veillard0161e632008-08-28 15:36:32 +000013071 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013072 * Get the 4 first bytes and decode the charset
13073 * if enc != XML_CHAR_ENCODING_NONE
13074 * plug some encoding conversion routines.
13075 */
13076 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013077 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13078 start[0] = RAW;
13079 start[1] = NXT(1);
13080 start[2] = NXT(2);
13081 start[3] = NXT(3);
13082 enc = xmlDetectCharEncoding(start, 4);
13083 if (enc != XML_CHAR_ENCODING_NONE) {
13084 xmlSwitchEncoding(ctxt, enc);
13085 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013086 }
13087
Owen Taylor3473f882001-02-23 17:55:21 +000013088 /*
13089 * Parse a possible text declaration first
13090 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013091 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013092 xmlParseTextDecl(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013093 /*
13094 * An XML-1.0 document can't reference an entity not XML-1.0
13095 */
13096 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13097 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13098 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13099 "Version mismatch between document and entity\n");
13100 }
Owen Taylor3473f882001-02-23 17:55:21 +000013101 }
13102
Owen Taylor3473f882001-02-23 17:55:21 +000013103 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013104 ctxt->depth = depth;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013105 if (oldctxt != NULL) {
13106 ctxt->_private = oldctxt->_private;
13107 ctxt->loadsubset = oldctxt->loadsubset;
13108 ctxt->validate = oldctxt->validate;
13109 ctxt->valid = oldctxt->valid;
13110 ctxt->replaceEntities = oldctxt->replaceEntities;
13111 if (oldctxt->validate) {
13112 ctxt->vctxt.error = oldctxt->vctxt.error;
13113 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13114 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13115 }
13116 ctxt->external = oldctxt->external;
13117 if (ctxt->dict) xmlDictFree(ctxt->dict);
13118 ctxt->dict = oldctxt->dict;
13119 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13120 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13121 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13122 ctxt->dictNames = oldctxt->dictNames;
13123 ctxt->attsDefault = oldctxt->attsDefault;
13124 ctxt->attsSpecial = oldctxt->attsSpecial;
13125 ctxt->linenumbers = oldctxt->linenumbers;
13126 ctxt->record_info = oldctxt->record_info;
13127 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13128 ctxt->node_seq.length = oldctxt->node_seq.length;
13129 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13130 } else {
13131 /*
13132 * Doing validity checking on chunk without context
13133 * doesn't make sense
13134 */
13135 ctxt->_private = NULL;
13136 ctxt->validate = 0;
13137 ctxt->external = 2;
13138 ctxt->loadsubset = 0;
13139 }
Owen Taylor3473f882001-02-23 17:55:21 +000013140
13141 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013142
Daniel Veillard561b7f82002-03-20 21:55:57 +000013143 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013144 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013145 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013146 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013147 }
13148 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013149 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013150 }
13151
13152 if (!ctxt->wellFormed) {
13153 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013154 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013155 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013156 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013157 } else {
13158 if (list != NULL) {
13159 xmlNodePtr cur;
13160
13161 /*
13162 * Return the newly created nodeset after unlinking it from
13163 * they pseudo parent.
13164 */
13165 cur = newDoc->children->children;
13166 *list = cur;
13167 while (cur != NULL) {
13168 cur->parent = NULL;
13169 cur = cur->next;
13170 }
13171 newDoc->children->children = NULL;
13172 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013173 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013174 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013175
13176 /*
13177 * Record in the parent context the number of entities replacement
13178 * done when parsing that reference.
13179 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013180 if (oldctxt != NULL)
13181 oldctxt->nbentities += ctxt->nbentities;
13182
Daniel Veillard0161e632008-08-28 15:36:32 +000013183 /*
13184 * Also record the size of the entity parsed
13185 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013186 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013187 oldctxt->sizeentities += ctxt->input->consumed;
13188 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13189 }
13190 /*
13191 * And record the last error if any
13192 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013193 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013194 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13195
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013196 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013197 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013198 if (oldctxt != NULL) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013199 ctxt->dict = NULL;
13200 ctxt->attsDefault = NULL;
13201 ctxt->attsSpecial = NULL;
13202 oldctxt->validate = ctxt->validate;
13203 oldctxt->valid = ctxt->valid;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013204 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13205 oldctxt->node_seq.length = ctxt->node_seq.length;
13206 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13207 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013208 ctxt->node_seq.maximum = 0;
13209 ctxt->node_seq.length = 0;
13210 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013211 xmlFreeParserCtxt(ctxt);
13212 newDoc->intSubset = NULL;
13213 newDoc->extSubset = NULL;
13214 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013215
Owen Taylor3473f882001-02-23 17:55:21 +000013216 return(ret);
13217}
13218
Daniel Veillard81273902003-09-30 00:43:48 +000013219#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013220/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013221 * xmlParseExternalEntity:
13222 * @doc: the document the chunk pertains to
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013223 * @sax: the SAX handler block (possibly NULL)
Daniel Veillard257d9102001-05-08 10:41:44 +000013224 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13225 * @depth: Used for loop detection, use 0
13226 * @URL: the URL for the entity to load
13227 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013228 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013229 *
13230 * Parse an external general entity
13231 * An external general parsed entity is well-formed if it matches the
13232 * production labeled extParsedEnt.
13233 *
13234 * [78] extParsedEnt ::= TextDecl? content
13235 *
13236 * Returns 0 if the entity is well formed, -1 in case of args problem and
13237 * the parser error code otherwise
13238 */
13239
13240int
13241xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013242 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013243 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013244 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013245}
13246
13247/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013248 * xmlParseBalancedChunkMemory:
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013249 * @doc: the document the chunk pertains to (must not be NULL)
13250 * @sax: the SAX handler block (possibly NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013251 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13252 * @depth: Used for loop detection, use 0
13253 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013254 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013255 *
13256 * Parse a well-balanced chunk of an XML document
13257 * called by the parser
13258 * The allowed sequence for the Well Balanced Chunk is the one defined by
13259 * the content production in the XML grammar:
13260 *
13261 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13262 *
13263 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13264 * the parser error code otherwise
13265 */
13266
13267int
13268xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013269 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013270 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13271 depth, string, lst, 0 );
13272}
Daniel Veillard81273902003-09-30 00:43:48 +000013273#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013274
13275/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013276 * xmlParseBalancedChunkMemoryInternal:
13277 * @oldctxt: the existing parsing context
13278 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13279 * @user_data: the user data field for the parser context
13280 * @lst: the return value for the set of parsed nodes
13281 *
13282 *
13283 * Parse a well-balanced chunk of an XML document
13284 * called by the parser
13285 * The allowed sequence for the Well Balanced Chunk is the one defined by
13286 * the content production in the XML grammar:
13287 *
13288 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13289 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013290 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13291 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013292 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013293 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013294 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013295 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013296static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013297xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13298 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13299 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013300 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013301 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013302 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013303 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013304 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013305 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013306 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013307#ifdef SAX2
13308 int i;
13309#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013310
Daniel Veillard0161e632008-08-28 15:36:32 +000013311 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13312 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013313 return(XML_ERR_ENTITY_LOOP);
13314 }
13315
13316
13317 if (lst != NULL)
13318 *lst = NULL;
13319 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013320 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013321
13322 size = xmlStrlen(string);
13323
13324 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013325 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013326 if (user_data != NULL)
13327 ctxt->userData = user_data;
13328 else
13329 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013330 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13331 ctxt->dict = oldctxt->dict;
Daniel Veillardad88b542017-12-08 09:42:31 +010013332 ctxt->input_id = oldctxt->input_id + 1;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013333 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13334 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13335 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013336
Daniel Veillard74eaec12009-08-26 15:57:20 +020013337#ifdef SAX2
13338 /* propagate namespaces down the entity */
13339 for (i = 0;i < oldctxt->nsNr;i += 2) {
13340 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13341 }
13342#endif
13343
Daniel Veillard328f48c2002-11-15 15:24:34 +000013344 oldsax = ctxt->sax;
13345 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013346 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013347 ctxt->replaceEntities = oldctxt->replaceEntities;
13348 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013349
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013350 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013351 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013352 newDoc = xmlNewDoc(BAD_CAST "1.0");
13353 if (newDoc == NULL) {
13354 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013355 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013356 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013357 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013358 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013359 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013360 newDoc->dict = ctxt->dict;
13361 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013362 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013363 } else {
13364 ctxt->myDoc = oldctxt->myDoc;
13365 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013366 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013367 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013368 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13369 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013370 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013371 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013372 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013373 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013374 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013375 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013376 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013377 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013378 ctxt->myDoc->children = NULL;
13379 ctxt->myDoc->last = NULL;
13380 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013381 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013382 ctxt->instate = XML_PARSER_CONTENT;
13383 ctxt->depth = oldctxt->depth + 1;
13384
Daniel Veillard328f48c2002-11-15 15:24:34 +000013385 ctxt->validate = 0;
13386 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013387 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13388 /*
13389 * ID/IDREF registration will be done in xmlValidateElement below
13390 */
13391 ctxt->loadsubset |= XML_SKIP_IDS;
13392 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013393 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013394 ctxt->attsDefault = oldctxt->attsDefault;
13395 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013396
Daniel Veillard68e9e742002-11-16 15:35:11 +000013397 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013398 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013399 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013400 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013401 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013402 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013403 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013404 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013405 }
13406
13407 if (!ctxt->wellFormed) {
13408 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013409 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013410 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013411 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013412 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013413 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013414 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013415
William M. Brack7b9154b2003-09-27 19:23:50 +000013416 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013417 xmlNodePtr cur;
13418
13419 /*
13420 * Return the newly created nodeset after unlinking it from
13421 * they pseudo parent.
13422 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013423 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013424 *lst = cur;
13425 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013426#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013427 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13428 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13429 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013430 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13431 oldctxt->myDoc, cur);
13432 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013433#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013434 cur->parent = NULL;
13435 cur = cur->next;
13436 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013437 ctxt->myDoc->children->children = NULL;
13438 }
13439 if (ctxt->myDoc != NULL) {
13440 xmlFreeNode(ctxt->myDoc->children);
13441 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013442 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013443 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013444
13445 /*
13446 * Record in the parent context the number of entities replacement
13447 * done when parsing that reference.
13448 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013449 if (oldctxt != NULL)
13450 oldctxt->nbentities += ctxt->nbentities;
13451
Daniel Veillard0161e632008-08-28 15:36:32 +000013452 /*
13453 * Also record the last error if any
13454 */
13455 if (ctxt->lastError.code != XML_ERR_OK)
13456 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13457
Daniel Veillard328f48c2002-11-15 15:24:34 +000013458 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013459 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013460 ctxt->attsDefault = NULL;
13461 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013462 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013463 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013464 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013465 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013466
Daniel Veillard328f48c2002-11-15 15:24:34 +000013467 return(ret);
13468}
13469
Daniel Veillard29b17482004-08-16 00:39:03 +000013470/**
13471 * xmlParseInNodeContext:
13472 * @node: the context node
13473 * @data: the input string
13474 * @datalen: the input string length in bytes
13475 * @options: a combination of xmlParserOption
13476 * @lst: the return value for the set of parsed nodes
13477 *
13478 * Parse a well-balanced chunk of an XML document
13479 * within the context (DTD, namespaces, etc ...) of the given node.
13480 *
13481 * The allowed sequence for the data is a Well Balanced Chunk defined by
13482 * the content production in the XML grammar:
13483 *
13484 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13485 *
13486 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13487 * error code otherwise
13488 */
13489xmlParserErrors
13490xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13491 int options, xmlNodePtr *lst) {
13492#ifdef SAX2
13493 xmlParserCtxtPtr ctxt;
13494 xmlDocPtr doc = NULL;
13495 xmlNodePtr fake, cur;
13496 int nsnr = 0;
13497
13498 xmlParserErrors ret = XML_ERR_OK;
13499
13500 /*
13501 * check all input parameters, grab the document
13502 */
13503 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13504 return(XML_ERR_INTERNAL_ERROR);
13505 switch (node->type) {
13506 case XML_ELEMENT_NODE:
13507 case XML_ATTRIBUTE_NODE:
13508 case XML_TEXT_NODE:
13509 case XML_CDATA_SECTION_NODE:
13510 case XML_ENTITY_REF_NODE:
13511 case XML_PI_NODE:
13512 case XML_COMMENT_NODE:
13513 case XML_DOCUMENT_NODE:
13514 case XML_HTML_DOCUMENT_NODE:
13515 break;
13516 default:
13517 return(XML_ERR_INTERNAL_ERROR);
13518
13519 }
13520 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13521 (node->type != XML_DOCUMENT_NODE) &&
13522 (node->type != XML_HTML_DOCUMENT_NODE))
13523 node = node->parent;
13524 if (node == NULL)
13525 return(XML_ERR_INTERNAL_ERROR);
13526 if (node->type == XML_ELEMENT_NODE)
13527 doc = node->doc;
13528 else
13529 doc = (xmlDocPtr) node;
13530 if (doc == NULL)
13531 return(XML_ERR_INTERNAL_ERROR);
13532
13533 /*
13534 * allocate a context and set-up everything not related to the
13535 * node position in the tree
13536 */
13537 if (doc->type == XML_DOCUMENT_NODE)
13538 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13539#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013540 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013541 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013542 /*
13543 * When parsing in context, it makes no sense to add implied
13544 * elements like html/body/etc...
13545 */
13546 options |= HTML_PARSE_NOIMPLIED;
13547 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013548#endif
13549 else
13550 return(XML_ERR_INTERNAL_ERROR);
13551
13552 if (ctxt == NULL)
13553 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013554
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013555 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013556 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13557 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13558 * we must wait until the last moment to free the original one.
13559 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013560 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013561 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013562 xmlDictFree(ctxt->dict);
13563 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013564 } else
13565 options |= XML_PARSE_NODICT;
13566
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013567 if (doc->encoding != NULL) {
13568 xmlCharEncodingHandlerPtr hdlr;
13569
13570 if (ctxt->encoding != NULL)
13571 xmlFree((xmlChar *) ctxt->encoding);
13572 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13573
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013574 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013575 if (hdlr != NULL) {
13576 xmlSwitchToEncoding(ctxt, hdlr);
13577 } else {
13578 return(XML_ERR_UNSUPPORTED_ENCODING);
13579 }
13580 }
13581
Daniel Veillard37334572008-07-31 08:20:02 +000013582 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013583 xmlDetectSAX2(ctxt);
13584 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013585 /* parsing in context, i.e. as within existing content */
Daniel Veillardad88b542017-12-08 09:42:31 +010013586 ctxt->input_id = 2;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013587 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013588
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013589 fake = xmlNewComment(NULL);
13590 if (fake == NULL) {
13591 xmlFreeParserCtxt(ctxt);
13592 return(XML_ERR_NO_MEMORY);
13593 }
13594 xmlAddChild(node, fake);
13595
Daniel Veillard29b17482004-08-16 00:39:03 +000013596 if (node->type == XML_ELEMENT_NODE) {
13597 nodePush(ctxt, node);
13598 /*
13599 * initialize the SAX2 namespaces stack
13600 */
13601 cur = node;
13602 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13603 xmlNsPtr ns = cur->nsDef;
13604 const xmlChar *iprefix, *ihref;
13605
13606 while (ns != NULL) {
13607 if (ctxt->dict) {
13608 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13609 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13610 } else {
13611 iprefix = ns->prefix;
13612 ihref = ns->href;
13613 }
13614
13615 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13616 nsPush(ctxt, iprefix, ihref);
13617 nsnr++;
13618 }
13619 ns = ns->next;
13620 }
13621 cur = cur->parent;
13622 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013623 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013624
13625 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13626 /*
13627 * ID/IDREF registration will be done in xmlValidateElement below
13628 */
13629 ctxt->loadsubset |= XML_SKIP_IDS;
13630 }
13631
Daniel Veillard499cc922006-01-18 17:22:35 +000013632#ifdef LIBXML_HTML_ENABLED
13633 if (doc->type == XML_HTML_DOCUMENT_NODE)
13634 __htmlParseContent(ctxt);
13635 else
13636#endif
13637 xmlParseContent(ctxt);
13638
Daniel Veillard29b17482004-08-16 00:39:03 +000013639 nsPop(ctxt, nsnr);
13640 if ((RAW == '<') && (NXT(1) == '/')) {
13641 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13642 } else if (RAW != 0) {
13643 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13644 }
13645 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13646 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13647 ctxt->wellFormed = 0;
13648 }
13649
13650 if (!ctxt->wellFormed) {
13651 if (ctxt->errNo == 0)
13652 ret = XML_ERR_INTERNAL_ERROR;
13653 else
13654 ret = (xmlParserErrors)ctxt->errNo;
13655 } else {
13656 ret = XML_ERR_OK;
13657 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013658
Daniel Veillard29b17482004-08-16 00:39:03 +000013659 /*
13660 * Return the newly created nodeset after unlinking it from
13661 * the pseudo sibling.
13662 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013663
Daniel Veillard29b17482004-08-16 00:39:03 +000013664 cur = fake->next;
13665 fake->next = NULL;
13666 node->last = fake;
13667
13668 if (cur != NULL) {
13669 cur->prev = NULL;
13670 }
13671
13672 *lst = cur;
13673
13674 while (cur != NULL) {
13675 cur->parent = NULL;
13676 cur = cur->next;
13677 }
13678
13679 xmlUnlinkNode(fake);
13680 xmlFreeNode(fake);
13681
13682
13683 if (ret != XML_ERR_OK) {
13684 xmlFreeNodeList(*lst);
13685 *lst = NULL;
13686 }
William M. Brackc3f81342004-10-03 01:22:44 +000013687
William M. Brackb7b54de2004-10-06 16:38:01 +000013688 if (doc->dict != NULL)
13689 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013690 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013691
Daniel Veillard29b17482004-08-16 00:39:03 +000013692 return(ret);
13693#else /* !SAX2 */
13694 return(XML_ERR_INTERNAL_ERROR);
13695#endif
13696}
13697
Daniel Veillard81273902003-09-30 00:43:48 +000013698#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013699/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013700 * xmlParseBalancedChunkMemoryRecover:
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013701 * @doc: the document the chunk pertains to (must not be NULL)
13702 * @sax: the SAX handler block (possibly NULL)
Daniel Veillard58e44c92002-08-02 22:19:49 +000013703 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13704 * @depth: Used for loop detection, use 0
13705 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13706 * @lst: the return value for the set of parsed nodes
13707 * @recover: return nodes even if the data is broken (use 0)
13708 *
13709 *
13710 * Parse a well-balanced chunk of an XML document
13711 * called by the parser
13712 * The allowed sequence for the Well Balanced Chunk is the one defined by
13713 * the content production in the XML grammar:
13714 *
13715 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13716 *
13717 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13718 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013719 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013720 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013721 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13722 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013723 */
13724int
13725xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013726 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013727 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013728 xmlParserCtxtPtr ctxt;
13729 xmlDocPtr newDoc;
13730 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013731 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013732 int size;
13733 int ret = 0;
13734
Daniel Veillard0161e632008-08-28 15:36:32 +000013735 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013736 return(XML_ERR_ENTITY_LOOP);
13737 }
13738
13739
Daniel Veillardcda96922001-08-21 10:56:31 +000013740 if (lst != NULL)
13741 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013742 if (string == NULL)
13743 return(-1);
13744
13745 size = xmlStrlen(string);
13746
13747 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13748 if (ctxt == NULL) return(-1);
13749 ctxt->userData = ctxt;
13750 if (sax != NULL) {
13751 oldsax = ctxt->sax;
13752 ctxt->sax = sax;
13753 if (user_data != NULL)
13754 ctxt->userData = user_data;
13755 }
13756 newDoc = xmlNewDoc(BAD_CAST "1.0");
13757 if (newDoc == NULL) {
13758 xmlFreeParserCtxt(ctxt);
13759 return(-1);
13760 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013761 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013762 if ((doc != NULL) && (doc->dict != NULL)) {
13763 xmlDictFree(ctxt->dict);
13764 ctxt->dict = doc->dict;
13765 xmlDictReference(ctxt->dict);
13766 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13767 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13768 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13769 ctxt->dictNames = 1;
13770 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013771 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013772 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013773 /* doc == NULL is only supported for historic reasons */
Owen Taylor3473f882001-02-23 17:55:21 +000013774 if (doc != NULL) {
13775 newDoc->intSubset = doc->intSubset;
13776 newDoc->extSubset = doc->extSubset;
13777 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013778 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13779 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013780 if (sax != NULL)
13781 ctxt->sax = oldsax;
13782 xmlFreeParserCtxt(ctxt);
13783 newDoc->intSubset = NULL;
13784 newDoc->extSubset = NULL;
13785 xmlFreeDoc(newDoc);
13786 return(-1);
13787 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013788 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13789 nodePush(ctxt, newRoot);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013790 /* doc == NULL is only supported for historic reasons */
Owen Taylor3473f882001-02-23 17:55:21 +000013791 if (doc == NULL) {
13792 ctxt->myDoc = newDoc;
13793 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013794 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013795 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013796 /* Ensure that doc has XML spec namespace */
13797 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13798 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013799 }
13800 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardad88b542017-12-08 09:42:31 +010013801 ctxt->input_id = 2;
Owen Taylor3473f882001-02-23 17:55:21 +000013802 ctxt->depth = depth;
13803
13804 /*
13805 * Doing validity checking on chunk doesn't make sense
13806 */
13807 ctxt->validate = 0;
13808 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013809 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013810
Daniel Veillardb39bc392002-10-26 19:29:51 +000013811 if ( doc != NULL ){
13812 content = doc->children;
13813 doc->children = NULL;
13814 xmlParseContent(ctxt);
13815 doc->children = content;
13816 }
13817 else {
13818 xmlParseContent(ctxt);
13819 }
Owen Taylor3473f882001-02-23 17:55:21 +000013820 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013821 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013822 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013823 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013824 }
13825 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013826 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013827 }
13828
13829 if (!ctxt->wellFormed) {
13830 if (ctxt->errNo == 0)
13831 ret = 1;
13832 else
13833 ret = ctxt->errNo;
13834 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013835 ret = 0;
13836 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013837
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013838 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13839 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013840
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013841 /*
13842 * Return the newly created nodeset after unlinking it from
13843 * they pseudo parent.
13844 */
13845 cur = newDoc->children->children;
13846 *lst = cur;
13847 while (cur != NULL) {
13848 xmlSetTreeDoc(cur, doc);
13849 cur->parent = NULL;
13850 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013851 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013852 newDoc->children->children = NULL;
13853 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013854
13855 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013856 ctxt->sax = oldsax;
13857 xmlFreeParserCtxt(ctxt);
13858 newDoc->intSubset = NULL;
13859 newDoc->extSubset = NULL;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013860 /* This leaks the namespace list if doc == NULL */
Rob Richardsa02f1992006-09-16 14:04:26 +000013861 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013862 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013863
Owen Taylor3473f882001-02-23 17:55:21 +000013864 return(ret);
13865}
13866
13867/**
13868 * xmlSAXParseEntity:
13869 * @sax: the SAX handler block
13870 * @filename: the filename
13871 *
13872 * parse an XML external entity out of context and build a tree.
13873 * It use the given SAX function block to handle the parsing callback.
13874 * If sax is NULL, fallback to the default DOM tree building routines.
13875 *
13876 * [78] extParsedEnt ::= TextDecl? content
13877 *
13878 * This correspond to a "Well Balanced" chunk
13879 *
13880 * Returns the resulting document tree
13881 */
13882
13883xmlDocPtr
13884xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13885 xmlDocPtr ret;
13886 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013887
13888 ctxt = xmlCreateFileParserCtxt(filename);
13889 if (ctxt == NULL) {
13890 return(NULL);
13891 }
13892 if (sax != NULL) {
13893 if (ctxt->sax != NULL)
13894 xmlFree(ctxt->sax);
13895 ctxt->sax = sax;
13896 ctxt->userData = NULL;
13897 }
13898
Owen Taylor3473f882001-02-23 17:55:21 +000013899 xmlParseExtParsedEnt(ctxt);
13900
13901 if (ctxt->wellFormed)
13902 ret = ctxt->myDoc;
13903 else {
13904 ret = NULL;
13905 xmlFreeDoc(ctxt->myDoc);
13906 ctxt->myDoc = NULL;
13907 }
13908 if (sax != NULL)
13909 ctxt->sax = NULL;
13910 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013911
Owen Taylor3473f882001-02-23 17:55:21 +000013912 return(ret);
13913}
13914
13915/**
13916 * xmlParseEntity:
13917 * @filename: the filename
13918 *
13919 * parse an XML external entity out of context and build a tree.
13920 *
13921 * [78] extParsedEnt ::= TextDecl? content
13922 *
13923 * This correspond to a "Well Balanced" chunk
13924 *
13925 * Returns the resulting document tree
13926 */
13927
13928xmlDocPtr
13929xmlParseEntity(const char *filename) {
13930 return(xmlSAXParseEntity(NULL, filename));
13931}
Daniel Veillard81273902003-09-30 00:43:48 +000013932#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013933
13934/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013935 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013936 * @URL: the entity URL
13937 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013938 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013939 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013940 *
13941 * Create a parser context for an external entity
13942 * Automatic support for ZLIB/Compress compressed document is provided
13943 * by default if found at compile-time.
13944 *
13945 * Returns the new parser context or NULL
13946 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013947static xmlParserCtxtPtr
13948xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13949 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013950 xmlParserCtxtPtr ctxt;
13951 xmlParserInputPtr inputStream;
13952 char *directory = NULL;
13953 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013954
Owen Taylor3473f882001-02-23 17:55:21 +000013955 ctxt = xmlNewParserCtxt();
13956 if (ctxt == NULL) {
13957 return(NULL);
13958 }
13959
Daniel Veillard48247b42009-07-10 16:12:46 +020013960 if (pctx != NULL) {
13961 ctxt->options = pctx->options;
13962 ctxt->_private = pctx->_private;
Daniel Veillardad88b542017-12-08 09:42:31 +010013963 /*
13964 * this is a subparser of pctx, so the input_id should be
13965 * incremented to distinguish from main entity
13966 */
13967 ctxt->input_id = pctx->input_id + 1;
Rob Richards9c0aa472009-03-26 18:10:19 +000013968 }
13969
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013970 /* Don't read from stdin. */
13971 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13972 URL = BAD_CAST "./-";
13973
Owen Taylor3473f882001-02-23 17:55:21 +000013974 uri = xmlBuildURI(URL, base);
13975
13976 if (uri == NULL) {
13977 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13978 if (inputStream == NULL) {
13979 xmlFreeParserCtxt(ctxt);
13980 return(NULL);
13981 }
13982
13983 inputPush(ctxt, inputStream);
13984
13985 if ((ctxt->directory == NULL) && (directory == NULL))
13986 directory = xmlParserGetDirectory((char *)URL);
13987 if ((ctxt->directory == NULL) && (directory != NULL))
13988 ctxt->directory = directory;
13989 } else {
13990 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13991 if (inputStream == NULL) {
13992 xmlFree(uri);
13993 xmlFreeParserCtxt(ctxt);
13994 return(NULL);
13995 }
13996
13997 inputPush(ctxt, inputStream);
13998
13999 if ((ctxt->directory == NULL) && (directory == NULL))
14000 directory = xmlParserGetDirectory((char *)uri);
14001 if ((ctxt->directory == NULL) && (directory != NULL))
14002 ctxt->directory = directory;
14003 xmlFree(uri);
14004 }
Owen Taylor3473f882001-02-23 17:55:21 +000014005 return(ctxt);
14006}
14007
Rob Richards9c0aa472009-03-26 18:10:19 +000014008/**
14009 * xmlCreateEntityParserCtxt:
14010 * @URL: the entity URL
14011 * @ID: the entity PUBLIC ID
14012 * @base: a possible base for the target URI
14013 *
14014 * Create a parser context for an external entity
14015 * Automatic support for ZLIB/Compress compressed document is provided
14016 * by default if found at compile-time.
14017 *
14018 * Returns the new parser context or NULL
14019 */
14020xmlParserCtxtPtr
14021xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14022 const xmlChar *base) {
14023 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14024
14025}
14026
Owen Taylor3473f882001-02-23 17:55:21 +000014027/************************************************************************
14028 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014029 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014030 * *
14031 ************************************************************************/
14032
14033/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014034 * xmlCreateURLParserCtxt:
14035 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014036 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014037 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014038 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014039 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014040 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014041 *
14042 * Returns the new parser context or NULL
14043 */
14044xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014045xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014046{
14047 xmlParserCtxtPtr ctxt;
14048 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014049 char *directory = NULL;
14050
Owen Taylor3473f882001-02-23 17:55:21 +000014051 ctxt = xmlNewParserCtxt();
14052 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014053 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014054 return(NULL);
14055 }
14056
Daniel Veillarddf292f72005-01-16 19:00:15 +000014057 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014058 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014059 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014060
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014061 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014062 if (inputStream == NULL) {
14063 xmlFreeParserCtxt(ctxt);
14064 return(NULL);
14065 }
14066
Owen Taylor3473f882001-02-23 17:55:21 +000014067 inputPush(ctxt, inputStream);
14068 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014069 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014070 if ((ctxt->directory == NULL) && (directory != NULL))
14071 ctxt->directory = directory;
14072
14073 return(ctxt);
14074}
14075
Daniel Veillard61b93382003-11-03 14:28:31 +000014076/**
14077 * xmlCreateFileParserCtxt:
14078 * @filename: the filename
14079 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014080 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014081 * Automatic support for ZLIB/Compress compressed document is provided
14082 * by default if found at compile-time.
14083 *
14084 * Returns the new parser context or NULL
14085 */
14086xmlParserCtxtPtr
14087xmlCreateFileParserCtxt(const char *filename)
14088{
14089 return(xmlCreateURLParserCtxt(filename, 0));
14090}
14091
Daniel Veillard81273902003-09-30 00:43:48 +000014092#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014093/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014094 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014095 * @sax: the SAX handler block
14096 * @filename: the filename
14097 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14098 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014099 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014100 *
14101 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14102 * compressed document is provided by default if found at compile-time.
14103 * It use the given SAX function block to handle the parsing callback.
14104 * If sax is NULL, fallback to the default DOM tree building routines.
14105 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014106 * User data (void *) is stored within the parser context in the
14107 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014108 *
Owen Taylor3473f882001-02-23 17:55:21 +000014109 * Returns the resulting document tree
14110 */
14111
14112xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014113xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14114 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014115 xmlDocPtr ret;
14116 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014117
Daniel Veillard635ef722001-10-29 11:48:19 +000014118 xmlInitParser();
14119
Owen Taylor3473f882001-02-23 17:55:21 +000014120 ctxt = xmlCreateFileParserCtxt(filename);
14121 if (ctxt == NULL) {
14122 return(NULL);
14123 }
14124 if (sax != NULL) {
14125 if (ctxt->sax != NULL)
14126 xmlFree(ctxt->sax);
14127 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014128 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014129 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014130 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014131 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014132 }
Owen Taylor3473f882001-02-23 17:55:21 +000014133
Daniel Veillard37d2d162008-03-14 10:54:00 +000014134 if (ctxt->directory == NULL)
14135 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014136
Daniel Veillarddad3f682002-11-17 16:47:27 +000014137 ctxt->recovery = recovery;
14138
Owen Taylor3473f882001-02-23 17:55:21 +000014139 xmlParseDocument(ctxt);
14140
William M. Brackc07329e2003-09-08 01:57:30 +000014141 if ((ctxt->wellFormed) || recovery) {
14142 ret = ctxt->myDoc;
Haibo Huangd23e46c2020-10-28 22:26:09 -070014143 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014144 if (ctxt->input->buf->compressed > 0)
14145 ret->compression = 9;
14146 else
14147 ret->compression = ctxt->input->buf->compressed;
14148 }
William M. Brackc07329e2003-09-08 01:57:30 +000014149 }
Owen Taylor3473f882001-02-23 17:55:21 +000014150 else {
14151 ret = NULL;
14152 xmlFreeDoc(ctxt->myDoc);
14153 ctxt->myDoc = NULL;
14154 }
14155 if (sax != NULL)
14156 ctxt->sax = NULL;
14157 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014158
Owen Taylor3473f882001-02-23 17:55:21 +000014159 return(ret);
14160}
14161
14162/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014163 * xmlSAXParseFile:
14164 * @sax: the SAX handler block
14165 * @filename: the filename
14166 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14167 * documents
14168 *
14169 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14170 * compressed document is provided by default if found at compile-time.
14171 * It use the given SAX function block to handle the parsing callback.
14172 * If sax is NULL, fallback to the default DOM tree building routines.
14173 *
14174 * Returns the resulting document tree
14175 */
14176
14177xmlDocPtr
14178xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14179 int recovery) {
14180 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14181}
14182
14183/**
Owen Taylor3473f882001-02-23 17:55:21 +000014184 * xmlRecoverDoc:
14185 * @cur: a pointer to an array of xmlChar
14186 *
14187 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014188 * In the case the document is not Well Formed, a attempt to build a
14189 * tree is tried anyway
14190 *
14191 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014192 */
14193
14194xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014195xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014196 return(xmlSAXParseDoc(NULL, cur, 1));
14197}
14198
14199/**
14200 * xmlParseFile:
14201 * @filename: the filename
14202 *
14203 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14204 * compressed document is provided by default if found at compile-time.
14205 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014206 * Returns the resulting document tree if the file was wellformed,
14207 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014208 */
14209
14210xmlDocPtr
14211xmlParseFile(const char *filename) {
14212 return(xmlSAXParseFile(NULL, filename, 0));
14213}
14214
14215/**
14216 * xmlRecoverFile:
14217 * @filename: the filename
14218 *
14219 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14220 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014221 * In the case the document is not Well Formed, it attempts to build
14222 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014223 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014224 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014225 */
14226
14227xmlDocPtr
14228xmlRecoverFile(const char *filename) {
14229 return(xmlSAXParseFile(NULL, filename, 1));
14230}
14231
14232
14233/**
14234 * xmlSetupParserForBuffer:
14235 * @ctxt: an XML parser context
14236 * @buffer: a xmlChar * buffer
14237 * @filename: a file name
14238 *
14239 * Setup the parser context to parse a new buffer; Clears any prior
14240 * contents from the parser context. The buffer parameter must not be
14241 * NULL, but the filename parameter can be
14242 */
14243void
14244xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14245 const char* filename)
14246{
14247 xmlParserInputPtr input;
14248
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014249 if ((ctxt == NULL) || (buffer == NULL))
14250 return;
14251
Owen Taylor3473f882001-02-23 17:55:21 +000014252 input = xmlNewInputStream(ctxt);
14253 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014254 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014255 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014256 return;
14257 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014258
Owen Taylor3473f882001-02-23 17:55:21 +000014259 xmlClearParserCtxt(ctxt);
14260 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014261 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014262 input->base = buffer;
14263 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014264 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014265 inputPush(ctxt, input);
14266}
14267
14268/**
14269 * xmlSAXUserParseFile:
14270 * @sax: a SAX handler
14271 * @user_data: The user data returned on SAX callbacks
14272 * @filename: a file name
14273 *
14274 * parse an XML file and call the given SAX handler routines.
14275 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014276 *
Owen Taylor3473f882001-02-23 17:55:21 +000014277 * Returns 0 in case of success or a error number otherwise
14278 */
14279int
14280xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14281 const char *filename) {
14282 int ret = 0;
14283 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014284
Owen Taylor3473f882001-02-23 17:55:21 +000014285 ctxt = xmlCreateFileParserCtxt(filename);
14286 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014287 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014288 xmlFree(ctxt->sax);
14289 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014290 xmlDetectSAX2(ctxt);
14291
Owen Taylor3473f882001-02-23 17:55:21 +000014292 if (user_data != NULL)
14293 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014294
Owen Taylor3473f882001-02-23 17:55:21 +000014295 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014296
Owen Taylor3473f882001-02-23 17:55:21 +000014297 if (ctxt->wellFormed)
14298 ret = 0;
14299 else {
14300 if (ctxt->errNo != 0)
14301 ret = ctxt->errNo;
14302 else
14303 ret = -1;
14304 }
14305 if (sax != NULL)
14306 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014307 if (ctxt->myDoc != NULL) {
14308 xmlFreeDoc(ctxt->myDoc);
14309 ctxt->myDoc = NULL;
14310 }
Owen Taylor3473f882001-02-23 17:55:21 +000014311 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014312
Owen Taylor3473f882001-02-23 17:55:21 +000014313 return ret;
14314}
Daniel Veillard81273902003-09-30 00:43:48 +000014315#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014316
14317/************************************************************************
14318 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014319 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014320 * *
14321 ************************************************************************/
14322
14323/**
14324 * xmlCreateMemoryParserCtxt:
14325 * @buffer: a pointer to a char array
14326 * @size: the size of the array
14327 *
14328 * Create a parser context for an XML in-memory document.
14329 *
14330 * Returns the new parser context or NULL
14331 */
14332xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014333xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014334 xmlParserCtxtPtr ctxt;
14335 xmlParserInputPtr input;
14336 xmlParserInputBufferPtr buf;
14337
14338 if (buffer == NULL)
14339 return(NULL);
14340 if (size <= 0)
14341 return(NULL);
14342
14343 ctxt = xmlNewParserCtxt();
14344 if (ctxt == NULL)
14345 return(NULL);
14346
Daniel Veillard53350552003-09-18 13:35:51 +000014347 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014348 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014349 if (buf == NULL) {
14350 xmlFreeParserCtxt(ctxt);
14351 return(NULL);
14352 }
Owen Taylor3473f882001-02-23 17:55:21 +000014353
14354 input = xmlNewInputStream(ctxt);
14355 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014356 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014357 xmlFreeParserCtxt(ctxt);
14358 return(NULL);
14359 }
14360
14361 input->filename = NULL;
14362 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014363 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014364
14365 inputPush(ctxt, input);
14366 return(ctxt);
14367}
14368
Daniel Veillard81273902003-09-30 00:43:48 +000014369#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014370/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014371 * xmlSAXParseMemoryWithData:
14372 * @sax: the SAX handler block
14373 * @buffer: an pointer to a char array
14374 * @size: the size of the array
14375 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14376 * documents
14377 * @data: the userdata
14378 *
14379 * parse an XML in-memory block and use the given SAX function block
14380 * to handle the parsing callback. If sax is NULL, fallback to the default
14381 * DOM tree building routines.
14382 *
14383 * User data (void *) is stored within the parser context in the
14384 * context's _private member, so it is available nearly everywhere in libxml
14385 *
14386 * Returns the resulting document tree
14387 */
14388
14389xmlDocPtr
14390xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14391 int size, int recovery, void *data) {
14392 xmlDocPtr ret;
14393 xmlParserCtxtPtr ctxt;
14394
Daniel Veillardab2a7632009-07-09 08:45:03 +020014395 xmlInitParser();
14396
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014397 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14398 if (ctxt == NULL) return(NULL);
14399 if (sax != NULL) {
14400 if (ctxt->sax != NULL)
14401 xmlFree(ctxt->sax);
14402 ctxt->sax = sax;
14403 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014404 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014405 if (data!=NULL) {
14406 ctxt->_private=data;
14407 }
14408
Daniel Veillardadba5f12003-04-04 16:09:01 +000014409 ctxt->recovery = recovery;
14410
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014411 xmlParseDocument(ctxt);
14412
14413 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14414 else {
14415 ret = NULL;
14416 xmlFreeDoc(ctxt->myDoc);
14417 ctxt->myDoc = NULL;
14418 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014419 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014420 ctxt->sax = NULL;
14421 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014422
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014423 return(ret);
14424}
14425
14426/**
Owen Taylor3473f882001-02-23 17:55:21 +000014427 * xmlSAXParseMemory:
14428 * @sax: the SAX handler block
14429 * @buffer: an pointer to a char array
14430 * @size: the size of the array
14431 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14432 * documents
14433 *
14434 * parse an XML in-memory block and use the given SAX function block
14435 * to handle the parsing callback. If sax is NULL, fallback to the default
14436 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014437 *
Owen Taylor3473f882001-02-23 17:55:21 +000014438 * Returns the resulting document tree
14439 */
14440xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014441xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14442 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014443 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014444}
14445
14446/**
14447 * xmlParseMemory:
14448 * @buffer: an pointer to a char array
14449 * @size: the size of the array
14450 *
14451 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014452 *
Owen Taylor3473f882001-02-23 17:55:21 +000014453 * Returns the resulting document tree
14454 */
14455
Daniel Veillard50822cb2001-07-26 20:05:51 +000014456xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014457 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14458}
14459
14460/**
14461 * xmlRecoverMemory:
14462 * @buffer: an pointer to a char array
14463 * @size: the size of the array
14464 *
14465 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014466 * In the case the document is not Well Formed, an attempt to
14467 * build a tree is tried anyway
14468 *
14469 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014470 */
14471
Daniel Veillard50822cb2001-07-26 20:05:51 +000014472xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014473 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14474}
14475
14476/**
14477 * xmlSAXUserParseMemory:
14478 * @sax: a SAX handler
14479 * @user_data: The user data returned on SAX callbacks
14480 * @buffer: an in-memory XML document input
14481 * @size: the length of the XML document in bytes
14482 *
14483 * A better SAX parsing routine.
14484 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014485 *
Owen Taylor3473f882001-02-23 17:55:21 +000014486 * Returns 0 in case of success or a error number otherwise
14487 */
14488int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014489 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014490 int ret = 0;
14491 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014492
14493 xmlInitParser();
14494
Owen Taylor3473f882001-02-23 17:55:21 +000014495 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14496 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014497 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14498 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014499 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014500 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014501
Daniel Veillard30211a02001-04-26 09:33:18 +000014502 if (user_data != NULL)
14503 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014504
Owen Taylor3473f882001-02-23 17:55:21 +000014505 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014506
Owen Taylor3473f882001-02-23 17:55:21 +000014507 if (ctxt->wellFormed)
14508 ret = 0;
14509 else {
14510 if (ctxt->errNo != 0)
14511 ret = ctxt->errNo;
14512 else
14513 ret = -1;
14514 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014515 if (sax != NULL)
14516 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014517 if (ctxt->myDoc != NULL) {
14518 xmlFreeDoc(ctxt->myDoc);
14519 ctxt->myDoc = NULL;
14520 }
Owen Taylor3473f882001-02-23 17:55:21 +000014521 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014522
Owen Taylor3473f882001-02-23 17:55:21 +000014523 return ret;
14524}
Daniel Veillard81273902003-09-30 00:43:48 +000014525#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014526
14527/**
14528 * xmlCreateDocParserCtxt:
14529 * @cur: a pointer to an array of xmlChar
14530 *
14531 * Creates a parser context for an XML in-memory document.
14532 *
14533 * Returns the new parser context or NULL
14534 */
14535xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014536xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014537 int len;
14538
14539 if (cur == NULL)
14540 return(NULL);
14541 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014542 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014543}
14544
Daniel Veillard81273902003-09-30 00:43:48 +000014545#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014546/**
14547 * xmlSAXParseDoc:
14548 * @sax: the SAX handler block
14549 * @cur: a pointer to an array of xmlChar
14550 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14551 * documents
14552 *
14553 * parse an XML in-memory document and build a tree.
14554 * It use the given SAX function block to handle the parsing callback.
14555 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014556 *
Owen Taylor3473f882001-02-23 17:55:21 +000014557 * Returns the resulting document tree
14558 */
14559
14560xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014561xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014562 xmlDocPtr ret;
14563 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014564 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014565
Daniel Veillard38936062004-11-04 17:45:11 +000014566 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014567
14568
14569 ctxt = xmlCreateDocParserCtxt(cur);
14570 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014571 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014572 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014573 ctxt->sax = sax;
14574 ctxt->userData = NULL;
14575 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014576 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014577
14578 xmlParseDocument(ctxt);
14579 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14580 else {
14581 ret = NULL;
14582 xmlFreeDoc(ctxt->myDoc);
14583 ctxt->myDoc = NULL;
14584 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014585 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014586 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014587 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014588
Owen Taylor3473f882001-02-23 17:55:21 +000014589 return(ret);
14590}
14591
14592/**
14593 * xmlParseDoc:
14594 * @cur: a pointer to an array of xmlChar
14595 *
14596 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014597 *
Owen Taylor3473f882001-02-23 17:55:21 +000014598 * Returns the resulting document tree
14599 */
14600
14601xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014602xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014603 return(xmlSAXParseDoc(NULL, cur, 0));
14604}
Daniel Veillard81273902003-09-30 00:43:48 +000014605#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014606
Daniel Veillard81273902003-09-30 00:43:48 +000014607#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014608/************************************************************************
14609 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014610 * Specific function to keep track of entities references *
14611 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014612 * *
14613 ************************************************************************/
14614
14615static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14616
14617/**
14618 * xmlAddEntityReference:
14619 * @ent : A valid entity
14620 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014621 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014622 *
14623 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14624 */
14625static void
14626xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14627 xmlNodePtr lastNode)
14628{
14629 if (xmlEntityRefFunc != NULL) {
14630 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14631 }
14632}
14633
14634
14635/**
14636 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014637 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014638 *
14639 * Set the function to call call back when a xml reference has been made
14640 */
14641void
14642xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14643{
14644 xmlEntityRefFunc = func;
14645}
Daniel Veillard81273902003-09-30 00:43:48 +000014646#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014647
14648/************************************************************************
14649 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014650 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014651 * *
14652 ************************************************************************/
14653
14654#ifdef LIBXML_XPATH_ENABLED
14655#include <libxml/xpath.h>
14656#endif
14657
Daniel Veillardffa3c742005-07-21 13:24:09 +000014658extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014659static int xmlParserInitialized = 0;
14660
14661/**
14662 * xmlInitParser:
14663 *
14664 * Initialization function for the XML parser.
14665 * This is not reentrant. Call once before processing in case of
14666 * use in multithreaded programs.
14667 */
14668
14669void
14670xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014671 if (xmlParserInitialized != 0)
14672 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014673
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014674#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14675 atexit(xmlCleanupParser);
14676#endif
14677
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014678#ifdef LIBXML_THREAD_ENABLED
14679 __xmlGlobalInitMutexLock();
14680 if (xmlParserInitialized == 0) {
14681#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014682 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014683 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014684 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14685 (xmlGenericError == NULL))
14686 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014687 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014688 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014689 xmlInitCharEncodingHandlers();
14690 xmlDefaultSAXHandlerInit();
14691 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014692#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014693 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014694#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014695#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014696 htmlInitAutoClose();
14697 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014698#endif
14699#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014700 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014701#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014702 xmlParserInitialized = 1;
14703#ifdef LIBXML_THREAD_ENABLED
14704 }
14705 __xmlGlobalInitMutexUnlock();
14706#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014707}
14708
14709/**
14710 * xmlCleanupParser:
14711 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014712 * This function name is somewhat misleading. It does not clean up
14713 * parser state, it cleans up memory allocated by the library itself.
14714 * It is a cleanup function for the XML library. It tries to reclaim all
14715 * related global memory allocated for the library processing.
14716 * It doesn't deallocate any document related memory. One should
14717 * call xmlCleanupParser() only when the process has finished using
14718 * the library and all XML/HTML documents built with it.
14719 * See also xmlInitParser() which has the opposite function of preparing
14720 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014721 *
14722 * WARNING: if your application is multithreaded or has plugin support
14723 * calling this may crash the application if another thread or
14724 * a plugin is still using libxml2. It's sometimes very hard to
14725 * guess if libxml2 is in use in the application, some libraries
14726 * or plugins may use it without notice. In case of doubt abstain
14727 * from calling this function or do it just before calling exit()
14728 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014729 */
14730
14731void
14732xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014733 if (!xmlParserInitialized)
14734 return;
14735
Owen Taylor3473f882001-02-23 17:55:21 +000014736 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014737#ifdef LIBXML_CATALOG_ENABLED
14738 xmlCatalogCleanup();
14739#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014740 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014741 xmlCleanupInputCallbacks();
14742#ifdef LIBXML_OUTPUT_ENABLED
14743 xmlCleanupOutputCallbacks();
14744#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014745#ifdef LIBXML_SCHEMAS_ENABLED
14746 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014747 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014748#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014749 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014750 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014751 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014752 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014753 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014754}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014755
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014756#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14757 !defined(_WIN32)
14758static void
14759ATTRIBUTE_DESTRUCTOR
14760xmlDestructor(void) {
Haibo Huangf0a546b2020-09-01 20:28:19 -070014761 /*
14762 * Calling custom deallocation functions in a destructor can cause
14763 * problems, for example with Nokogiri.
14764 */
14765 if (xmlFree == free)
14766 xmlCleanupParser();
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014767}
14768#endif
14769
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014770/************************************************************************
14771 * *
14772 * New set (2.6.0) of simpler and more flexible APIs *
14773 * *
14774 ************************************************************************/
14775
14776/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014777 * DICT_FREE:
14778 * @str: a string
14779 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014780 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014781 * current scope
14782 */
14783#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014784 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014785 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14786 xmlFree((char *)(str));
14787
14788/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014789 * xmlCtxtReset:
14790 * @ctxt: an XML parser context
14791 *
14792 * Reset a parser context
14793 */
14794void
14795xmlCtxtReset(xmlParserCtxtPtr ctxt)
14796{
14797 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014798 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014799
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014800 if (ctxt == NULL)
14801 return;
14802
14803 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014804
14805 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14806 xmlFreeInputStream(input);
14807 }
14808 ctxt->inputNr = 0;
14809 ctxt->input = NULL;
14810
14811 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014812 if (ctxt->spaceTab != NULL) {
14813 ctxt->spaceTab[0] = -1;
14814 ctxt->space = &ctxt->spaceTab[0];
14815 } else {
14816 ctxt->space = NULL;
14817 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014818
14819
14820 ctxt->nodeNr = 0;
14821 ctxt->node = NULL;
14822
14823 ctxt->nameNr = 0;
14824 ctxt->name = NULL;
14825
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014826 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014827 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014828 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014829 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014830 DICT_FREE(ctxt->directory);
14831 ctxt->directory = NULL;
14832 DICT_FREE(ctxt->extSubURI);
14833 ctxt->extSubURI = NULL;
14834 DICT_FREE(ctxt->extSubSystem);
14835 ctxt->extSubSystem = NULL;
14836 if (ctxt->myDoc != NULL)
14837 xmlFreeDoc(ctxt->myDoc);
14838 ctxt->myDoc = NULL;
14839
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014840 ctxt->standalone = -1;
14841 ctxt->hasExternalSubset = 0;
14842 ctxt->hasPErefs = 0;
14843 ctxt->html = 0;
14844 ctxt->external = 0;
14845 ctxt->instate = XML_PARSER_START;
14846 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014847
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014848 ctxt->wellFormed = 1;
14849 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014850 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014851 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014852#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014853 ctxt->vctxt.userData = ctxt;
14854 ctxt->vctxt.error = xmlParserValidityError;
14855 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014856#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014857 ctxt->record_info = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014858 ctxt->checkIndex = 0;
14859 ctxt->inSubset = 0;
14860 ctxt->errNo = XML_ERR_OK;
14861 ctxt->depth = 0;
14862 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14863 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014864 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014865 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014866 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014867 xmlInitNodeInfoSeq(&ctxt->node_seq);
14868
14869 if (ctxt->attsDefault != NULL) {
Nick Wellnhofere03f0a12017-11-09 16:42:47 +010014870 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014871 ctxt->attsDefault = NULL;
14872 }
14873 if (ctxt->attsSpecial != NULL) {
14874 xmlHashFree(ctxt->attsSpecial, NULL);
14875 ctxt->attsSpecial = NULL;
14876 }
14877
Daniel Veillard4432df22003-09-28 18:58:27 +000014878#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014879 if (ctxt->catalogs != NULL)
14880 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014881#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014882 if (ctxt->lastError.code != XML_ERR_OK)
14883 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014884}
14885
14886/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014887 * xmlCtxtResetPush:
14888 * @ctxt: an XML parser context
14889 * @chunk: a pointer to an array of chars
14890 * @size: number of chars in the array
14891 * @filename: an optional file name or URI
14892 * @encoding: the document encoding, or NULL
14893 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014894 * Reset a push parser context
14895 *
14896 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014897 */
14898int
14899xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14900 int size, const char *filename, const char *encoding)
14901{
14902 xmlParserInputPtr inputStream;
14903 xmlParserInputBufferPtr buf;
14904 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14905
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014906 if (ctxt == NULL)
14907 return(1);
14908
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014909 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14910 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14911
14912 buf = xmlAllocParserInputBuffer(enc);
14913 if (buf == NULL)
14914 return(1);
14915
14916 if (ctxt == NULL) {
14917 xmlFreeParserInputBuffer(buf);
14918 return(1);
14919 }
14920
14921 xmlCtxtReset(ctxt);
14922
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014923 if (filename == NULL) {
14924 ctxt->directory = NULL;
14925 } else {
14926 ctxt->directory = xmlParserGetDirectory(filename);
14927 }
14928
14929 inputStream = xmlNewInputStream(ctxt);
14930 if (inputStream == NULL) {
14931 xmlFreeParserInputBuffer(buf);
14932 return(1);
14933 }
14934
14935 if (filename == NULL)
14936 inputStream->filename = NULL;
14937 else
14938 inputStream->filename = (char *)
14939 xmlCanonicPath((const xmlChar *) filename);
14940 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014941 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014942
14943 inputPush(ctxt, inputStream);
14944
14945 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14946 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014947 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14948 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014949
14950 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14951
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014952 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014953#ifdef DEBUG_PUSH
14954 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14955#endif
14956 }
14957
14958 if (encoding != NULL) {
14959 xmlCharEncodingHandlerPtr hdlr;
14960
Daniel Veillard37334572008-07-31 08:20:02 +000014961 if (ctxt->encoding != NULL)
14962 xmlFree((xmlChar *) ctxt->encoding);
14963 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14964
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014965 hdlr = xmlFindCharEncodingHandler(encoding);
14966 if (hdlr != NULL) {
14967 xmlSwitchToEncoding(ctxt, hdlr);
14968 } else {
14969 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14970 "Unsupported encoding %s\n", BAD_CAST encoding);
14971 }
14972 } else if (enc != XML_CHAR_ENCODING_NONE) {
14973 xmlSwitchEncoding(ctxt, enc);
14974 }
14975
14976 return(0);
14977}
14978
Daniel Veillard37334572008-07-31 08:20:02 +000014979
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014980/**
Daniel Veillard37334572008-07-31 08:20:02 +000014981 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014982 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014983 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014984 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014985 *
14986 * Applies the options to the parser context
14987 *
14988 * Returns 0 in case of success, the set of unknown or unimplemented options
14989 * in case of error.
14990 */
Daniel Veillard37334572008-07-31 08:20:02 +000014991static int
14992xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014993{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014994 if (ctxt == NULL)
14995 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014996 if (encoding != NULL) {
14997 if (ctxt->encoding != NULL)
14998 xmlFree((xmlChar *) ctxt->encoding);
14999 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15000 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015001 if (options & XML_PARSE_RECOVER) {
15002 ctxt->recovery = 1;
15003 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015004 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015005 } else
15006 ctxt->recovery = 0;
15007 if (options & XML_PARSE_DTDLOAD) {
15008 ctxt->loadsubset = XML_DETECT_IDS;
15009 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015010 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015011 } else
15012 ctxt->loadsubset = 0;
15013 if (options & XML_PARSE_DTDATTR) {
15014 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15015 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015016 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015017 }
15018 if (options & XML_PARSE_NOENT) {
15019 ctxt->replaceEntities = 1;
15020 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15021 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015022 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015023 } else
15024 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015025 if (options & XML_PARSE_PEDANTIC) {
15026 ctxt->pedantic = 1;
15027 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015028 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015029 } else
15030 ctxt->pedantic = 0;
15031 if (options & XML_PARSE_NOBLANKS) {
15032 ctxt->keepBlanks = 0;
15033 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15034 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015035 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015036 } else
15037 ctxt->keepBlanks = 1;
15038 if (options & XML_PARSE_DTDVALID) {
15039 ctxt->validate = 1;
15040 if (options & XML_PARSE_NOWARNING)
15041 ctxt->vctxt.warning = NULL;
15042 if (options & XML_PARSE_NOERROR)
15043 ctxt->vctxt.error = NULL;
15044 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015045 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015046 } else
15047 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015048 if (options & XML_PARSE_NOWARNING) {
15049 ctxt->sax->warning = NULL;
15050 options -= XML_PARSE_NOWARNING;
15051 }
15052 if (options & XML_PARSE_NOERROR) {
15053 ctxt->sax->error = NULL;
15054 ctxt->sax->fatalError = NULL;
15055 options -= XML_PARSE_NOERROR;
15056 }
Daniel Veillard81273902003-09-30 00:43:48 +000015057#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015058 if (options & XML_PARSE_SAX1) {
15059 ctxt->sax->startElement = xmlSAX2StartElement;
15060 ctxt->sax->endElement = xmlSAX2EndElement;
15061 ctxt->sax->startElementNs = NULL;
15062 ctxt->sax->endElementNs = NULL;
15063 ctxt->sax->initialized = 1;
15064 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015065 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015066 }
Daniel Veillard81273902003-09-30 00:43:48 +000015067#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015068 if (options & XML_PARSE_NODICT) {
15069 ctxt->dictNames = 0;
15070 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015071 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015072 } else {
15073 ctxt->dictNames = 1;
15074 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015075 if (options & XML_PARSE_NOCDATA) {
15076 ctxt->sax->cdataBlock = NULL;
15077 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015078 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015079 }
15080 if (options & XML_PARSE_NSCLEAN) {
15081 ctxt->options |= XML_PARSE_NSCLEAN;
15082 options -= XML_PARSE_NSCLEAN;
15083 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015084 if (options & XML_PARSE_NONET) {
15085 ctxt->options |= XML_PARSE_NONET;
15086 options -= XML_PARSE_NONET;
15087 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015088 if (options & XML_PARSE_COMPACT) {
15089 ctxt->options |= XML_PARSE_COMPACT;
15090 options -= XML_PARSE_COMPACT;
15091 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015092 if (options & XML_PARSE_OLD10) {
15093 ctxt->options |= XML_PARSE_OLD10;
15094 options -= XML_PARSE_OLD10;
15095 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015096 if (options & XML_PARSE_NOBASEFIX) {
15097 ctxt->options |= XML_PARSE_NOBASEFIX;
15098 options -= XML_PARSE_NOBASEFIX;
15099 }
15100 if (options & XML_PARSE_HUGE) {
15101 ctxt->options |= XML_PARSE_HUGE;
15102 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015103 if (ctxt->dict != NULL)
15104 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015105 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015106 if (options & XML_PARSE_OLDSAX) {
15107 ctxt->options |= XML_PARSE_OLDSAX;
15108 options -= XML_PARSE_OLDSAX;
15109 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015110 if (options & XML_PARSE_IGNORE_ENC) {
15111 ctxt->options |= XML_PARSE_IGNORE_ENC;
15112 options -= XML_PARSE_IGNORE_ENC;
15113 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015114 if (options & XML_PARSE_BIG_LINES) {
15115 ctxt->options |= XML_PARSE_BIG_LINES;
15116 options -= XML_PARSE_BIG_LINES;
15117 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015118 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015119 return (options);
15120}
15121
15122/**
Daniel Veillard37334572008-07-31 08:20:02 +000015123 * xmlCtxtUseOptions:
15124 * @ctxt: an XML parser context
15125 * @options: a combination of xmlParserOption
15126 *
15127 * Applies the options to the parser context
15128 *
15129 * Returns 0 in case of success, the set of unknown or unimplemented options
15130 * in case of error.
15131 */
15132int
15133xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15134{
15135 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15136}
15137
15138/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015139 * xmlDoRead:
15140 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015141 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015142 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015143 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015144 * @reuse: keep the context for reuse
15145 *
15146 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015147 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015148 * Returns the resulting document tree or NULL
15149 */
15150static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015151xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15152 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015153{
15154 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015155
15156 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015157 if (encoding != NULL) {
15158 xmlCharEncodingHandlerPtr hdlr;
15159
15160 hdlr = xmlFindCharEncodingHandler(encoding);
15161 if (hdlr != NULL)
15162 xmlSwitchToEncoding(ctxt, hdlr);
15163 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015164 if ((URL != NULL) && (ctxt->input != NULL) &&
15165 (ctxt->input->filename == NULL))
15166 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015167 xmlParseDocument(ctxt);
15168 if ((ctxt->wellFormed) || ctxt->recovery)
15169 ret = ctxt->myDoc;
15170 else {
15171 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015172 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015173 xmlFreeDoc(ctxt->myDoc);
15174 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015175 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015176 ctxt->myDoc = NULL;
15177 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015178 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015179 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015180
15181 return (ret);
15182}
15183
15184/**
15185 * xmlReadDoc:
15186 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015187 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015188 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015189 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015190 *
15191 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015192 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015193 * Returns the resulting document tree
15194 */
15195xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015196xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015197{
15198 xmlParserCtxtPtr ctxt;
15199
15200 if (cur == NULL)
15201 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015202 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015203
15204 ctxt = xmlCreateDocParserCtxt(cur);
15205 if (ctxt == NULL)
15206 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015207 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015208}
15209
15210/**
15211 * xmlReadFile:
15212 * @filename: a file or URL
15213 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015214 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015215 *
15216 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015217 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015218 * Returns the resulting document tree
15219 */
15220xmlDocPtr
15221xmlReadFile(const char *filename, const char *encoding, int options)
15222{
15223 xmlParserCtxtPtr ctxt;
15224
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015225 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015226 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015227 if (ctxt == NULL)
15228 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015229 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015230}
15231
15232/**
15233 * xmlReadMemory:
15234 * @buffer: a pointer to a char array
15235 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015236 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015237 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015238 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015239 *
15240 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015241 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015242 * Returns the resulting document tree
15243 */
15244xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015245xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015246{
15247 xmlParserCtxtPtr ctxt;
15248
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015249 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015250 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15251 if (ctxt == NULL)
15252 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015253 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015254}
15255
15256/**
15257 * xmlReadFd:
15258 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015259 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015260 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015261 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015262 *
15263 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015264 * NOTE that the file descriptor will not be closed when the
15265 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015266 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015267 * Returns the resulting document tree
15268 */
15269xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015270xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015271{
15272 xmlParserCtxtPtr ctxt;
15273 xmlParserInputBufferPtr input;
15274 xmlParserInputPtr stream;
15275
15276 if (fd < 0)
15277 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015278 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015279
15280 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15281 if (input == NULL)
15282 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015283 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015284 ctxt = xmlNewParserCtxt();
15285 if (ctxt == NULL) {
15286 xmlFreeParserInputBuffer(input);
15287 return (NULL);
15288 }
15289 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15290 if (stream == NULL) {
15291 xmlFreeParserInputBuffer(input);
15292 xmlFreeParserCtxt(ctxt);
15293 return (NULL);
15294 }
15295 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015296 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015297}
15298
15299/**
15300 * xmlReadIO:
15301 * @ioread: an I/O read function
15302 * @ioclose: an I/O close function
15303 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015304 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015305 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015306 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015307 *
15308 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015309 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015310 * Returns the resulting document tree
15311 */
15312xmlDocPtr
15313xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015314 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015315{
15316 xmlParserCtxtPtr ctxt;
15317 xmlParserInputBufferPtr input;
15318 xmlParserInputPtr stream;
15319
15320 if (ioread == NULL)
15321 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015322 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015323
15324 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15325 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015326 if (input == NULL) {
15327 if (ioclose != NULL)
15328 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015329 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015330 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015331 ctxt = xmlNewParserCtxt();
15332 if (ctxt == NULL) {
15333 xmlFreeParserInputBuffer(input);
15334 return (NULL);
15335 }
15336 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15337 if (stream == NULL) {
15338 xmlFreeParserInputBuffer(input);
15339 xmlFreeParserCtxt(ctxt);
15340 return (NULL);
15341 }
15342 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015343 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015344}
15345
15346/**
15347 * xmlCtxtReadDoc:
15348 * @ctxt: an XML parser context
15349 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015350 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015351 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015352 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015353 *
15354 * parse an XML in-memory document and build a tree.
15355 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015356 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015357 * Returns the resulting document tree
15358 */
15359xmlDocPtr
15360xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015361 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015362{
15363 xmlParserInputPtr stream;
15364
15365 if (cur == NULL)
15366 return (NULL);
15367 if (ctxt == NULL)
15368 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015369 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015370
15371 xmlCtxtReset(ctxt);
15372
15373 stream = xmlNewStringInputStream(ctxt, cur);
15374 if (stream == NULL) {
15375 return (NULL);
15376 }
15377 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015378 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015379}
15380
15381/**
15382 * xmlCtxtReadFile:
15383 * @ctxt: an XML parser context
15384 * @filename: a file or URL
15385 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015386 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015387 *
15388 * parse an XML file from the filesystem or the network.
15389 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015390 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015391 * Returns the resulting document tree
15392 */
15393xmlDocPtr
15394xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15395 const char *encoding, int options)
15396{
15397 xmlParserInputPtr stream;
15398
15399 if (filename == NULL)
15400 return (NULL);
15401 if (ctxt == NULL)
15402 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015403 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015404
15405 xmlCtxtReset(ctxt);
15406
Daniel Veillard29614c72004-11-26 10:47:26 +000015407 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408 if (stream == NULL) {
15409 return (NULL);
15410 }
15411 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015412 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015413}
15414
15415/**
15416 * xmlCtxtReadMemory:
15417 * @ctxt: an XML parser context
15418 * @buffer: a pointer to a char array
15419 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015420 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015421 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015422 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015423 *
15424 * parse an XML in-memory document and build a tree.
15425 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015426 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015427 * Returns the resulting document tree
15428 */
15429xmlDocPtr
15430xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015431 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015432{
15433 xmlParserInputBufferPtr input;
15434 xmlParserInputPtr stream;
15435
15436 if (ctxt == NULL)
15437 return (NULL);
15438 if (buffer == NULL)
15439 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015440 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015441
15442 xmlCtxtReset(ctxt);
15443
15444 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15445 if (input == NULL) {
15446 return(NULL);
15447 }
15448
15449 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15450 if (stream == NULL) {
15451 xmlFreeParserInputBuffer(input);
15452 return(NULL);
15453 }
15454
15455 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015456 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015457}
15458
15459/**
15460 * xmlCtxtReadFd:
15461 * @ctxt: an XML parser context
15462 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015463 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015464 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015465 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015466 *
15467 * parse an XML from a file descriptor and build a tree.
15468 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015469 * NOTE that the file descriptor will not be closed when the
15470 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015471 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015472 * Returns the resulting document tree
15473 */
15474xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015475xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15476 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015477{
15478 xmlParserInputBufferPtr input;
15479 xmlParserInputPtr stream;
15480
15481 if (fd < 0)
15482 return (NULL);
15483 if (ctxt == NULL)
15484 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015485 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015486
15487 xmlCtxtReset(ctxt);
15488
15489
15490 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15491 if (input == NULL)
15492 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015493 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015494 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15495 if (stream == NULL) {
15496 xmlFreeParserInputBuffer(input);
15497 return (NULL);
15498 }
15499 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015500 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015501}
15502
15503/**
15504 * xmlCtxtReadIO:
15505 * @ctxt: an XML parser context
15506 * @ioread: an I/O read function
15507 * @ioclose: an I/O close function
15508 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015509 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015510 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015511 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015512 *
15513 * parse an XML document from I/O functions and source and build a tree.
15514 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015515 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015516 * Returns the resulting document tree
15517 */
15518xmlDocPtr
15519xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15520 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015521 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015522 const char *encoding, int options)
15523{
15524 xmlParserInputBufferPtr input;
15525 xmlParserInputPtr stream;
15526
15527 if (ioread == NULL)
15528 return (NULL);
15529 if (ctxt == NULL)
15530 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015531 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015532
15533 xmlCtxtReset(ctxt);
15534
15535 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15536 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015537 if (input == NULL) {
15538 if (ioclose != NULL)
15539 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015540 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015541 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015542 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15543 if (stream == NULL) {
15544 xmlFreeParserInputBuffer(input);
15545 return (NULL);
15546 }
15547 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015548 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015549}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015550
15551#define bottom_parser
15552#include "elfgcchack.h"