blob: 5384e5565c15952d72981bfefcc787089d7b0fe6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Nick Wellnhofere3890542017-10-09 00:20:01 +020041#if defined(_WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Elliott Hughese54f00d2021-05-13 08:13:46 -070090struct _xmlStartTag {
91 const xmlChar *prefix;
92 const xmlChar *URI;
93 int line;
94 int nsNr;
95};
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097static void
98xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
99
Rob Richards9c0aa472009-03-26 18:10:19 +0000100static xmlParserCtxtPtr
101xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
102 const xmlChar *base, xmlParserCtxtPtr pctx);
103
Daniel Veillard28cd9cb2015-11-20 14:55:30 +0800104static void xmlHaltParser(xmlParserCtxtPtr ctxt);
105
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700106static int
107xmlParseElementStart(xmlParserCtxtPtr ctxt);
108
109static void
110xmlParseElementEnd(xmlParserCtxtPtr ctxt);
111
Daniel Veillard0161e632008-08-28 15:36:32 +0000112/************************************************************************
113 * *
114 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
115 * *
116 ************************************************************************/
117
118#define XML_PARSER_BIG_ENTITY 1000
119#define XML_PARSER_LOT_ENTITY 5000
120
121/*
122 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
123 * replacement over the size in byte of the input indicates that you have
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700124 * and exponential behaviour. A value of 10 correspond to at least 3 entity
Daniel Veillard0161e632008-08-28 15:36:32 +0000125 * replacement per byte of input.
126 */
127#define XML_PARSER_NON_LINEAR 10
128
129/*
130 * xmlParserEntityCheck
131 *
132 * Function to check non-linear entity expansion behaviour
133 * This is here to detect and stop exponential linear entity expansion
134 * This is not a limitation of the parser but a safety
135 * boundary feature. It can be disabled with the XML_PARSE_HUGE
136 * parser option.
137 */
138static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800139xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800140 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000141{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800142 size_t consumed = 0;
Elliott Hughese54f00d2021-05-13 08:13:46 -0700143 int i;
Daniel Veillard0161e632008-08-28 15:36:32 +0000144
145 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
146 return (0);
147 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
148 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149
150 /*
151 * This may look absurd but is needed to detect
152 * entities problems
153 */
154 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800155 (ent->content != NULL) && (ent->checked == 0) &&
156 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700157 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800158 xmlChar *rep;
159
160 ent->checked = 1;
161
Peter Simons8f30bdf2016-04-15 11:56:55 +0200162 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800163 rep = xmlStringDecodeEntities(ctxt, ent->content,
164 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200165 --ctxt->depth;
Nick Wellnhofer707ad082018-01-23 16:37:54 +0100166 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbdd66182016-05-23 12:27:58 +0800167 ent->content[0] = 0;
168 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800169
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700170 diff = ctxt->nbentities - oldnbent + 1;
171 if (diff > INT_MAX / 2)
172 diff = INT_MAX / 2;
173 ent->checked = diff * 2;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800174 if (rep != NULL) {
175 if (xmlStrchr(rep, '<'))
176 ent->checked |= 1;
177 xmlFree(rep);
178 rep = NULL;
179 }
180 }
Elliott Hughese54f00d2021-05-13 08:13:46 -0700181
182 /*
183 * Prevent entity exponential check, not just replacement while
184 * parsing the DTD
185 * The check is potentially costly so do that only once in a thousand
186 */
187 if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
188 (ctxt->nbentities % 1024 == 0)) {
189 for (i = 0;i < ctxt->inputNr;i++) {
190 consumed += ctxt->inputTab[i]->consumed +
191 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
192 }
193 if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
194 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
195 ctxt->instate = XML_PARSER_EOF;
196 return (1);
197 }
198 consumed = 0;
199 }
200
201
202
Daniel Veillard23f05e02013-02-19 10:21:49 +0800203 if (replacement != 0) {
204 if (replacement < XML_MAX_TEXT_LENGTH)
205 return(0);
206
207 /*
208 * If the volume of entity copy reaches 10 times the
209 * amount of parsed data and over the large text threshold
210 * then that's very likely to be an abuse.
211 */
212 if (ctxt->input != NULL) {
213 consumed = ctxt->input->consumed +
214 (ctxt->input->cur - ctxt->input->base);
215 }
216 consumed += ctxt->sizeentities;
217
218 if (replacement < XML_PARSER_NON_LINEAR * consumed)
219 return(0);
220 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000221 /*
222 * Do the check based on the replacement size of the entity
223 */
224 if (size < XML_PARSER_BIG_ENTITY)
225 return(0);
226
227 /*
228 * A limit on the amount of text data reasonably used
229 */
230 if (ctxt->input != NULL) {
231 consumed = ctxt->input->consumed +
232 (ctxt->input->cur - ctxt->input->base);
233 }
234 consumed += ctxt->sizeentities;
235
236 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
237 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
238 return (0);
239 } else if (ent != NULL) {
240 /*
241 * use the number of parsed entities in the replacement
242 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800243 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000244
245 /*
246 * The amount of data parsed counting entities size only once
247 */
248 if (ctxt->input != NULL) {
249 consumed = ctxt->input->consumed +
250 (ctxt->input->cur - ctxt->input->base);
251 }
252 consumed += ctxt->sizeentities;
253
254 /*
255 * Check the density of entities for the amount of data
256 * knowing an entity reference will take at least 3 bytes
257 */
258 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
259 return (0);
260 } else {
261 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800262 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000263 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800264 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
265 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
266 (ctxt->nbentities <= 10000))
267 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000268 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
270 return (1);
271}
272
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000273/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000274 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000275 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000276 * arbitrary depth limit for the XML documents that we allow to
277 * process. This is not a limitation of the parser but a safety
278 * boundary feature. It can be disabled with the XML_PARSE_HUGE
279 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000280 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000281unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard0fb18932003-09-07 09:14:37 +0000283
Daniel Veillard0161e632008-08-28 15:36:32 +0000284
285#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000286#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000287#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000288#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
289
Daniel Veillard1f972e92012-08-15 10:16:37 +0800290/**
291 * XML_PARSER_CHUNK_SIZE
292 *
293 * When calling GROW that's the minimal amount of data
294 * the parser expected to have received. It is not a hard
295 * limit but an optimization when reading strings like Names
296 * It is not strictly needed as long as inputs available characters
297 * are followed by 0, which should be provided by the I/O level
298 */
299#define XML_PARSER_CHUNK_SIZE 100
300
Owen Taylor3473f882001-02-23 17:55:21 +0000301/*
Owen Taylor3473f882001-02-23 17:55:21 +0000302 * List of XML prefixed PI allowed by W3C specs
303 */
304
Daniel Veillardb44025c2001-10-11 22:55:55 +0000305static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000306 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800307 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000308 NULL
309};
310
Daniel Veillarda07050d2003-10-19 14:46:32 +0000311
Owen Taylor3473f882001-02-23 17:55:21 +0000312/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200313static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
314 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000315
Daniel Veillard7d515752003-09-26 19:12:37 +0000316static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000317xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
318 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000319 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000320 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000321
Daniel Veillard37334572008-07-31 08:20:02 +0000322static int
323xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
324 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000325#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000326static void
327xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
328 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000329#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000330
Daniel Veillard7d515752003-09-26 19:12:37 +0000331static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000332xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
333 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000334
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000335static int
336xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
337
Daniel Veillarde57ec792003-09-10 10:50:59 +0000338/************************************************************************
339 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800340 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000341 * *
342 ************************************************************************/
343
344/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 * xmlErrAttributeDup:
346 * @ctxt: an XML parser context
347 * @prefix: the attribute prefix
348 * @localname: the attribute localname
349 *
350 * Handle a redefinition of attribute error
351 */
352static void
353xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
354 const xmlChar * localname)
355{
Daniel Veillard157fee02003-10-31 10:36:03 +0000356 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
357 (ctxt->instate == XML_PARSER_EOF))
358 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000359 if (ctxt != NULL)
360 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200361
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000362 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000363 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200364 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 (const char *) localname, NULL, NULL, 0, 0,
366 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000367 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000368 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200369 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 (const char *) prefix, (const char *) localname,
371 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
372 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000373 if (ctxt != NULL) {
374 ctxt->wellFormed = 0;
375 if (ctxt->recovery == 0)
376 ctxt->disableSAX = 1;
377 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378}
379
380/**
381 * xmlFatalErr:
382 * @ctxt: an XML parser context
383 * @error: the error number
384 * @extra: extra information string
385 *
386 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
387 */
388static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000389xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390{
391 const char *errmsg;
392
Daniel Veillard157fee02003-10-31 10:36:03 +0000393 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
394 (ctxt->instate == XML_PARSER_EOF))
395 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 switch (error) {
397 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "internal error";
408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800437 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800443 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800461 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800464 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800467 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000468 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000469 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800470 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000471 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000472 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800473 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000474 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000475 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 errmsg = "Fragment not allowed";
477 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000478 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800479 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000480 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800482 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000484 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800485 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000487 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000489 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000490 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800491 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000493 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800494 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000496 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000499 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800501 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000502 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000503 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000505 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000506 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800507 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000508 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000509 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800510 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000511 break;
512 case XML_ERR_CONDSEC_INVALID_KEYWORD:
513 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000525 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000528 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000530 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000531 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800532 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000534 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800535 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000536 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000537 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800538 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000539 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000540 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800541 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000543 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800544 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000545 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000546 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800547 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000548 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000549 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800550 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000551 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000552 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800553 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000554 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000555 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800556 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000557 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000558 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800559 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000560 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000561 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800562 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000563 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000564 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800565 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000566 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800567 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800568 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800569 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000570#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000571 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800572 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000573 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000574#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000575 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800576 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000577 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000578 if (ctxt != NULL)
579 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800580 if (info == NULL) {
581 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
582 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
583 errmsg);
584 } else {
585 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
586 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
587 errmsg, info);
588 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000589 if (ctxt != NULL) {
590 ctxt->wellFormed = 0;
591 if (ctxt->recovery == 0)
592 ctxt->disableSAX = 1;
593 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000594}
595
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000596/**
597 * xmlFatalErrMsg:
598 * @ctxt: an XML parser context
599 * @error: the error number
600 * @msg: the error message
601 *
602 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
603 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800604static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000605xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
606 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000607{
Daniel Veillard157fee02003-10-31 10:36:03 +0000608 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
609 (ctxt->instate == XML_PARSER_EOF))
610 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000611 if (ctxt != NULL)
612 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000613 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200614 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000615 if (ctxt != NULL) {
616 ctxt->wellFormed = 0;
617 if (ctxt->recovery == 0)
618 ctxt->disableSAX = 1;
619 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000620}
621
622/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000623 * xmlWarningMsg:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 * @str2: extra data
629 *
630 * Handle a warning.
631 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800632static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000633xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
634 const char *msg, const xmlChar *str1, const xmlChar *str2)
635{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000636 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000637
Daniel Veillard157fee02003-10-31 10:36:03 +0000638 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
639 (ctxt->instate == XML_PARSER_EOF))
640 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000641 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
642 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000643 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200644 if (ctxt != NULL) {
645 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000646 (ctxt->sax) ? ctxt->sax->warning : NULL,
647 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000648 ctxt, NULL, XML_FROM_PARSER, error,
649 XML_ERR_WARNING, NULL, 0,
650 (const char *) str1, (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200652 } else {
653 __xmlRaiseError(schannel, NULL, NULL,
654 ctxt, NULL, XML_FROM_PARSER, error,
655 XML_ERR_WARNING, NULL, 0,
656 (const char *) str1, (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
658 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000659}
660
661/**
662 * xmlValidityError:
663 * @ctxt: an XML parser context
664 * @error: the error number
665 * @msg: the error message
666 * @str1: extra data
667 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000668 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000669 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800670static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000671xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000672 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000673{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000674 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000675
676 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
677 (ctxt->instate == XML_PARSER_EOF))
678 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000679 if (ctxt != NULL) {
680 ctxt->errNo = error;
681 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
682 schannel = ctxt->sax->serror;
683 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200684 if (ctxt != NULL) {
685 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000686 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000687 ctxt, NULL, XML_FROM_DTD, error,
688 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000689 (const char *) str2, NULL, 0, 0,
690 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000691 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200692 } else {
693 __xmlRaiseError(schannel, NULL, NULL,
694 ctxt, NULL, XML_FROM_DTD, error,
695 XML_ERR_ERROR, NULL, 0, (const char *) str1,
696 (const char *) str2, NULL, 0, 0,
697 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000698 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000699}
700
701/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000702 * xmlFatalErrMsgInt:
703 * @ctxt: an XML parser context
704 * @error: the error number
705 * @msg: the error message
706 * @val: an integer value
707 *
708 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
709 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800710static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000711xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000712 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000713{
Daniel Veillard157fee02003-10-31 10:36:03 +0000714 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
715 (ctxt->instate == XML_PARSER_EOF))
716 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000717 if (ctxt != NULL)
718 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000719 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000720 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
721 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000722 if (ctxt != NULL) {
723 ctxt->wellFormed = 0;
724 if (ctxt->recovery == 0)
725 ctxt->disableSAX = 1;
726 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000727}
728
729/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000730 * xmlFatalErrMsgStrIntStr:
731 * @ctxt: an XML parser context
732 * @error: the error number
733 * @msg: the error message
734 * @str1: an string info
735 * @val: an integer value
736 * @str2: an string info
737 *
738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
739 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800740static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000741xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800742 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000743 const xmlChar *str2)
744{
Daniel Veillard157fee02003-10-31 10:36:03 +0000745 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
746 (ctxt->instate == XML_PARSER_EOF))
747 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000748 if (ctxt != NULL)
749 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000750 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000751 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
752 NULL, 0, (const char *) str1, (const char *) str2,
753 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000754 if (ctxt != NULL) {
755 ctxt->wellFormed = 0;
756 if (ctxt->recovery == 0)
757 ctxt->disableSAX = 1;
758 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000759}
760
761/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000762 * xmlFatalErrMsgStr:
763 * @ctxt: an XML parser context
764 * @error: the error number
765 * @msg: the error message
766 * @val: a string value
767 *
768 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
769 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800770static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000771xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000772 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000773{
Daniel Veillard157fee02003-10-31 10:36:03 +0000774 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
775 (ctxt->instate == XML_PARSER_EOF))
776 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000777 if (ctxt != NULL)
778 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000779 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000780 XML_FROM_PARSER, error, XML_ERR_FATAL,
781 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
782 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000783 if (ctxt != NULL) {
784 ctxt->wellFormed = 0;
785 if (ctxt->recovery == 0)
786 ctxt->disableSAX = 1;
787 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000788}
789
790/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000791 * xmlErrMsgStr:
792 * @ctxt: an XML parser context
793 * @error: the error number
794 * @msg: the error message
795 * @val: a string value
796 *
797 * Handle a non fatal parser error
798 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800799static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000800xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
801 const char *msg, const xmlChar * val)
802{
Daniel Veillard157fee02003-10-31 10:36:03 +0000803 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
804 (ctxt->instate == XML_PARSER_EOF))
805 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000806 if (ctxt != NULL)
807 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000808 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000809 XML_FROM_PARSER, error, XML_ERR_ERROR,
810 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
811 val);
812}
813
814/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000815 * xmlNsErr:
816 * @ctxt: an XML parser context
817 * @error: the error number
818 * @msg: the message
819 * @info1: extra information string
820 * @info2: extra information string
821 *
822 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
823 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800824static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000825xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
826 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000827 const xmlChar * info1, const xmlChar * info2,
828 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000829{
Daniel Veillard157fee02003-10-31 10:36:03 +0000830 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
831 (ctxt->instate == XML_PARSER_EOF))
832 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000833 if (ctxt != NULL)
834 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000835 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000836 XML_ERR_ERROR, NULL, 0, (const char *) info1,
837 (const char *) info2, (const char *) info3, 0, 0, msg,
838 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000839 if (ctxt != NULL)
840 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000841}
842
Daniel Veillard37334572008-07-31 08:20:02 +0000843/**
844 * xmlNsWarn
845 * @ctxt: an XML parser context
846 * @error: the error number
847 * @msg: the message
848 * @info1: extra information string
849 * @info2: extra information string
850 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800851 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000852 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800853static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000854xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
855 const char *msg,
856 const xmlChar * info1, const xmlChar * info2,
857 const xmlChar * info3)
858{
859 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
860 (ctxt->instate == XML_PARSER_EOF))
861 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000862 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
863 XML_ERR_WARNING, NULL, 0, (const char *) info1,
864 (const char *) info2, (const char *) info3, 0, 0, msg,
865 info1, info2, info3);
866}
867
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000868/************************************************************************
869 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800870 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871 * *
872 ************************************************************************/
873
874/**
875 * xmlHasFeature:
876 * @feature: the feature to be examined
877 *
878 * Examines if the library has been compiled with a given feature.
879 *
880 * Returns a non-zero value if the feature exist, otherwise zero.
881 * Returns zero (0) if the feature does not exist or an unknown
882 * unknown feature is requested, non-zero otherwise.
883 */
884int
885xmlHasFeature(xmlFeature feature)
886{
887 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_THREAD_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_TREE_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_OUTPUT_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_PUSH_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_READER_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_PATTERN_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef LIBXML_WRITER_ENABLED
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_SAX1_ENABLED
932 return(1);
933#else
934 return(0);
935#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000936 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000937#ifdef LIBXML_FTP_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000942 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000943#ifdef LIBXML_HTTP_ENABLED
944 return(1);
945#else
946 return(0);
947#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000948 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000949#ifdef LIBXML_VALID_ENABLED
950 return(1);
951#else
952 return(0);
953#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000954 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000955#ifdef LIBXML_HTML_ENABLED
956 return(1);
957#else
958 return(0);
959#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000960 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000961#ifdef LIBXML_LEGACY_ENABLED
962 return(1);
963#else
964 return(0);
965#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000966 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000967#ifdef LIBXML_C14N_ENABLED
968 return(1);
969#else
970 return(0);
971#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000972 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000973#ifdef LIBXML_CATALOG_ENABLED
974 return(1);
975#else
976 return(0);
977#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000978 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000979#ifdef LIBXML_XPATH_ENABLED
980 return(1);
981#else
982 return(0);
983#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000984 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000985#ifdef LIBXML_XPTR_ENABLED
986 return(1);
987#else
988 return(0);
989#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000990 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000991#ifdef LIBXML_XINCLUDE_ENABLED
992 return(1);
993#else
994 return(0);
995#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000996 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000997#ifdef LIBXML_ICONV_ENABLED
998 return(1);
999#else
1000 return(0);
1001#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001002 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001003#ifdef LIBXML_ISO8859X_ENABLED
1004 return(1);
1005#else
1006 return(0);
1007#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001008 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001009#ifdef LIBXML_UNICODE_ENABLED
1010 return(1);
1011#else
1012 return(0);
1013#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001014 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001015#ifdef LIBXML_REGEXP_ENABLED
1016 return(1);
1017#else
1018 return(0);
1019#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001020 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001021#ifdef LIBXML_AUTOMATA_ENABLED
1022 return(1);
1023#else
1024 return(0);
1025#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001026 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001027#ifdef LIBXML_EXPR_ENABLED
1028 return(1);
1029#else
1030 return(0);
1031#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001032 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001033#ifdef LIBXML_SCHEMAS_ENABLED
1034 return(1);
1035#else
1036 return(0);
1037#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001038 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001039#ifdef LIBXML_SCHEMATRON_ENABLED
1040 return(1);
1041#else
1042 return(0);
1043#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001044 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001045#ifdef LIBXML_MODULES_ENABLED
1046 return(1);
1047#else
1048 return(0);
1049#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001050 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001051#ifdef LIBXML_DEBUG_ENABLED
1052 return(1);
1053#else
1054 return(0);
1055#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001056 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001057#ifdef DEBUG_MEMORY_LOCATION
1058 return(1);
1059#else
1060 return(0);
1061#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001062 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001063#ifdef LIBXML_DEBUG_RUNTIME
1064 return(1);
1065#else
1066 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001067#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001068 case XML_WITH_ZLIB:
1069#ifdef LIBXML_ZLIB_ENABLED
1070 return(1);
1071#else
1072 return(0);
1073#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001074 case XML_WITH_LZMA:
1075#ifdef LIBXML_LZMA_ENABLED
1076 return(1);
1077#else
1078 return(0);
1079#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001080 case XML_WITH_ICU:
1081#ifdef LIBXML_ICU_ENABLED
1082 return(1);
1083#else
1084 return(0);
1085#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001086 default:
1087 break;
1088 }
1089 return(0);
1090}
1091
1092/************************************************************************
1093 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001094 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001095 * *
1096 ************************************************************************/
1097
1098/**
1099 * xmlDetectSAX2:
1100 * @ctxt: an XML parser context
1101 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001102 * Do the SAX2 detection and specific initialization
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103 */
1104static void
1105xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
Haibo Huangf0a546b2020-09-01 20:28:19 -07001106 xmlSAXHandlerPtr sax;
Elliott Hughesecdab2a2022-02-23 14:33:50 -08001107
1108 /* Avoid unused variable warning if features are disabled. */
1109 (void) sax;
1110
Daniel Veillarde57ec792003-09-10 10:50:59 +00001111 if (ctxt == NULL) return;
Haibo Huangf0a546b2020-09-01 20:28:19 -07001112 sax = ctxt->sax;
Daniel Veillard81273902003-09-30 00:43:48 +00001113#ifdef LIBXML_SAX1_ENABLED
Haibo Huangf0a546b2020-09-01 20:28:19 -07001114 if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
1115 ((sax->startElementNs != NULL) ||
1116 (sax->endElementNs != NULL) ||
1117 ((sax->startElement == NULL) && (sax->endElement == NULL))))
1118 ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001119#else
1120 ctxt->sax2 = 1;
1121#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122
1123 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1124 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1125 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001126 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1127 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001128 xmlErrMemory(ctxt, NULL);
1129 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001130}
1131
Daniel Veillarde57ec792003-09-10 10:50:59 +00001132typedef struct _xmlDefAttrs xmlDefAttrs;
1133typedef xmlDefAttrs *xmlDefAttrsPtr;
1134struct _xmlDefAttrs {
1135 int nbAttrs; /* number of defaulted attributes on that element */
1136 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001137#if __STDC_VERSION__ >= 199901L
1138 /* Using a C99 flexible array member avoids UBSan errors. */
1139 const xmlChar *values[]; /* array of localname/prefix/values/external */
1140#else
1141 const xmlChar *values[5];
1142#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001144
1145/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001146 * xmlAttrNormalizeSpace:
1147 * @src: the source string
1148 * @dst: the target string
1149 *
1150 * Normalize the space in non CDATA attribute values:
1151 * If the attribute type is not CDATA, then the XML processor MUST further
1152 * process the normalized attribute value by discarding any leading and
1153 * trailing space (#x20) characters, and by replacing sequences of space
1154 * (#x20) characters by a single space (#x20) character.
1155 * Note that the size of dst need to be at least src, and if one doesn't need
1156 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1157 * passing src as dst is just fine.
1158 *
1159 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1160 * is needed.
1161 */
1162static xmlChar *
1163xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1164{
1165 if ((src == NULL) || (dst == NULL))
1166 return(NULL);
1167
1168 while (*src == 0x20) src++;
1169 while (*src != 0) {
1170 if (*src == 0x20) {
1171 while (*src == 0x20) src++;
1172 if (*src != 0)
1173 *dst++ = 0x20;
1174 } else {
1175 *dst++ = *src++;
1176 }
1177 }
1178 *dst = 0;
1179 if (dst == src)
1180 return(NULL);
1181 return(dst);
1182}
1183
1184/**
1185 * xmlAttrNormalizeSpace2:
1186 * @src: the source string
1187 *
1188 * Normalize the space in non CDATA attribute values, a slightly more complex
1189 * front end to avoid allocation problems when running on attribute values
1190 * coming from the input.
1191 *
1192 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1193 * is needed.
1194 */
1195static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001196xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001197{
1198 int i;
1199 int remove_head = 0;
1200 int need_realloc = 0;
1201 const xmlChar *cur;
1202
1203 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1204 return(NULL);
1205 i = *len;
1206 if (i <= 0)
1207 return(NULL);
1208
1209 cur = src;
1210 while (*cur == 0x20) {
1211 cur++;
1212 remove_head++;
1213 }
1214 while (*cur != 0) {
1215 if (*cur == 0x20) {
1216 cur++;
1217 if ((*cur == 0x20) || (*cur == 0)) {
1218 need_realloc = 1;
1219 break;
1220 }
1221 } else
1222 cur++;
1223 }
1224 if (need_realloc) {
1225 xmlChar *ret;
1226
1227 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1228 if (ret == NULL) {
1229 xmlErrMemory(ctxt, NULL);
1230 return(NULL);
1231 }
1232 xmlAttrNormalizeSpace(ret, ret);
1233 *len = (int) strlen((const char *)ret);
1234 return(ret);
1235 } else if (remove_head) {
1236 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001237 memmove(src, src + remove_head, 1 + *len);
1238 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001239 }
1240 return(NULL);
1241}
1242
1243/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001244 * xmlAddDefAttrs:
1245 * @ctxt: an XML parser context
1246 * @fullname: the element fullname
1247 * @fullattr: the attribute fullname
1248 * @value: the attribute value
1249 *
1250 * Add a defaulted attribute for an element
1251 */
1252static void
1253xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1254 const xmlChar *fullname,
1255 const xmlChar *fullattr,
1256 const xmlChar *value) {
1257 xmlDefAttrsPtr defaults;
1258 int len;
1259 const xmlChar *name;
1260 const xmlChar *prefix;
1261
Daniel Veillard6a31b832008-03-26 14:06:44 +00001262 /*
1263 * Allows to detect attribute redefinitions
1264 */
1265 if (ctxt->attsSpecial != NULL) {
1266 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1267 return;
1268 }
1269
Daniel Veillarde57ec792003-09-10 10:50:59 +00001270 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001271 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001272 if (ctxt->attsDefault == NULL)
1273 goto mem_error;
1274 }
1275
1276 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001277 * split the element name into prefix:localname , the string found
1278 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001279 */
1280 name = xmlSplitQName3(fullname, &len);
1281 if (name == NULL) {
1282 name = xmlDictLookup(ctxt->dict, fullname, -1);
1283 prefix = NULL;
1284 } else {
1285 name = xmlDictLookup(ctxt->dict, name, -1);
1286 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1287 }
1288
1289 /*
1290 * make sure there is some storage
1291 */
1292 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1293 if (defaults == NULL) {
1294 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001295 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001296 if (defaults == NULL)
1297 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001298 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001299 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001300 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1301 defaults, NULL) < 0) {
1302 xmlFree(defaults);
1303 goto mem_error;
1304 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001305 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001306 xmlDefAttrsPtr temp;
1307
1308 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001309 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001310 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001311 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001312 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001313 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001314 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1315 defaults, NULL) < 0) {
1316 xmlFree(defaults);
1317 goto mem_error;
1318 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001319 }
1320
1321 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001322 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001323 * are within the DTD and hen not associated to namespace names.
1324 */
1325 name = xmlSplitQName3(fullattr, &len);
1326 if (name == NULL) {
1327 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1328 prefix = NULL;
1329 } else {
1330 name = xmlDictLookup(ctxt->dict, name, -1);
1331 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1332 }
1333
Daniel Veillardae0765b2008-07-31 19:54:59 +00001334 defaults->values[5 * defaults->nbAttrs] = name;
1335 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001336 /* intern the string and precompute the end */
1337 len = xmlStrlen(value);
1338 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001339 defaults->values[5 * defaults->nbAttrs + 2] = value;
1340 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1341 if (ctxt->external)
1342 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1343 else
1344 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001345 defaults->nbAttrs++;
1346
1347 return;
1348
1349mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001350 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001351 return;
1352}
1353
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001354/**
1355 * xmlAddSpecialAttr:
1356 * @ctxt: an XML parser context
1357 * @fullname: the element fullname
1358 * @fullattr: the attribute fullname
1359 * @type: the attribute type
1360 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001361 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001362 */
1363static void
1364xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1365 const xmlChar *fullname,
1366 const xmlChar *fullattr,
1367 int type)
1368{
1369 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001370 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001371 if (ctxt->attsSpecial == NULL)
1372 goto mem_error;
1373 }
1374
Daniel Veillardac4118d2008-01-11 05:27:32 +00001375 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1376 return;
1377
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001378 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001379 (void *) (ptrdiff_t) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001380 return;
1381
1382mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001383 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001384 return;
1385}
1386
Daniel Veillard4432df22003-09-28 18:58:27 +00001387/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001388 * xmlCleanSpecialAttrCallback:
1389 *
1390 * Removes CDATA attributes from the special attribute table
1391 */
1392static void
1393xmlCleanSpecialAttrCallback(void *payload, void *data,
1394 const xmlChar *fullname, const xmlChar *fullattr,
1395 const xmlChar *unused ATTRIBUTE_UNUSED) {
1396 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1397
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001398 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001399 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1400 }
1401}
1402
1403/**
1404 * xmlCleanSpecialAttr:
1405 * @ctxt: an XML parser context
1406 *
1407 * Trim the list of attributes defined to remove all those of type
1408 * CDATA as they are not special. This call should be done when finishing
1409 * to parse the DTD and before starting to parse the document root.
1410 */
1411static void
1412xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1413{
1414 if (ctxt->attsSpecial == NULL)
1415 return;
1416
1417 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1418
1419 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1420 xmlHashFree(ctxt->attsSpecial, NULL);
1421 ctxt->attsSpecial = NULL;
1422 }
1423 return;
1424}
1425
1426/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001427 * xmlCheckLanguageID:
1428 * @lang: pointer to the string value
1429 *
1430 * Checks that the value conforms to the LanguageID production:
1431 *
1432 * NOTE: this is somewhat deprecated, those productions were removed from
1433 * the XML Second edition.
1434 *
1435 * [33] LanguageID ::= Langcode ('-' Subcode)*
1436 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1437 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1438 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1439 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1440 * [38] Subcode ::= ([a-z] | [A-Z])+
1441 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001442 * The current REC reference the successors of RFC 1766, currently 5646
Daniel Veillard60587d62010-11-04 15:16:27 +01001443 *
1444 * http://www.rfc-editor.org/rfc/rfc5646.txt
1445 * langtag = language
1446 * ["-" script]
1447 * ["-" region]
1448 * *("-" variant)
1449 * *("-" extension)
1450 * ["-" privateuse]
1451 * language = 2*3ALPHA ; shortest ISO 639 code
1452 * ["-" extlang] ; sometimes followed by
1453 * ; extended language subtags
1454 * / 4ALPHA ; or reserved for future use
1455 * / 5*8ALPHA ; or registered language subtag
1456 *
1457 * extlang = 3ALPHA ; selected ISO 639 codes
1458 * *2("-" 3ALPHA) ; permanently reserved
1459 *
1460 * script = 4ALPHA ; ISO 15924 code
1461 *
1462 * region = 2ALPHA ; ISO 3166-1 code
1463 * / 3DIGIT ; UN M.49 code
1464 *
1465 * variant = 5*8alphanum ; registered variants
1466 * / (DIGIT 3alphanum)
1467 *
1468 * extension = singleton 1*("-" (2*8alphanum))
1469 *
1470 * ; Single alphanumerics
1471 * ; "x" reserved for private use
1472 * singleton = DIGIT ; 0 - 9
1473 * / %x41-57 ; A - W
1474 * / %x59-5A ; Y - Z
1475 * / %x61-77 ; a - w
1476 * / %x79-7A ; y - z
1477 *
1478 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1479 * The parser below doesn't try to cope with extension or privateuse
1480 * that could be added but that's not interoperable anyway
1481 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001482 * Returns 1 if correct 0 otherwise
1483 **/
1484int
1485xmlCheckLanguageID(const xmlChar * lang)
1486{
Daniel Veillard60587d62010-11-04 15:16:27 +01001487 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001488
1489 if (cur == NULL)
1490 return (0);
1491 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001492 ((cur[0] == 'I') && (cur[1] == '-')) ||
1493 ((cur[0] == 'x') && (cur[1] == '-')) ||
1494 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001495 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001496 * Still allow IANA code and user code which were coming
1497 * from the previous version of the XML-1.0 specification
1498 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001499 */
1500 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001501 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001502 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1503 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001504 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001505 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001506 nxt = cur;
1507 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1508 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1509 nxt++;
1510 if (nxt - cur >= 4) {
1511 /*
1512 * Reserved
1513 */
1514 if ((nxt - cur > 8) || (nxt[0] != 0))
1515 return(0);
1516 return(1);
1517 }
1518 if (nxt - cur < 2)
1519 return(0);
1520 /* we got an ISO 639 code */
1521 if (nxt[0] == 0)
1522 return(1);
1523 if (nxt[0] != '-')
1524 return(0);
1525
1526 nxt++;
1527 cur = nxt;
1528 /* now we can have extlang or script or region or variant */
1529 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1530 goto region_m49;
1531
1532 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1533 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1534 nxt++;
1535 if (nxt - cur == 4)
1536 goto script;
1537 if (nxt - cur == 2)
1538 goto region;
1539 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1540 goto variant;
1541 if (nxt - cur != 3)
1542 return(0);
1543 /* we parsed an extlang */
1544 if (nxt[0] == 0)
1545 return(1);
1546 if (nxt[0] != '-')
1547 return(0);
1548
1549 nxt++;
1550 cur = nxt;
1551 /* now we can have script or region or variant */
1552 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1553 goto region_m49;
1554
1555 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1556 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1557 nxt++;
1558 if (nxt - cur == 2)
1559 goto region;
1560 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1561 goto variant;
1562 if (nxt - cur != 4)
1563 return(0);
1564 /* we parsed a script */
1565script:
1566 if (nxt[0] == 0)
1567 return(1);
1568 if (nxt[0] != '-')
1569 return(0);
1570
1571 nxt++;
1572 cur = nxt;
1573 /* now we can have region or variant */
1574 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1575 goto region_m49;
1576
1577 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1578 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1579 nxt++;
1580
1581 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1582 goto variant;
1583 if (nxt - cur != 2)
1584 return(0);
1585 /* we parsed a region */
1586region:
1587 if (nxt[0] == 0)
1588 return(1);
1589 if (nxt[0] != '-')
1590 return(0);
1591
1592 nxt++;
1593 cur = nxt;
1594 /* now we can just have a variant */
1595 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1596 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1597 nxt++;
1598
1599 if ((nxt - cur < 5) || (nxt - cur > 8))
1600 return(0);
1601
1602 /* we parsed a variant */
1603variant:
1604 if (nxt[0] == 0)
1605 return(1);
1606 if (nxt[0] != '-')
1607 return(0);
1608 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001609 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001610
1611region_m49:
1612 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1613 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1614 nxt += 3;
1615 goto region;
1616 }
1617 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001618}
1619
Owen Taylor3473f882001-02-23 17:55:21 +00001620/************************************************************************
1621 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001622 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001623 * *
1624 ************************************************************************/
1625
Daniel Veillard8ed10722009-08-20 19:17:36 +02001626static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1627 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001628
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629#ifdef SAX2
1630/**
1631 * nsPush:
1632 * @ctxt: an XML parser context
1633 * @prefix: the namespace prefix or NULL
1634 * @URL: the namespace name
1635 *
1636 * Pushes a new parser namespace on top of the ns stack
1637 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001638 * Returns -1 in case of error, -2 if the namespace should be discarded
1639 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 */
1641static int
1642nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1643{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001644 if (ctxt->options & XML_PARSE_NSCLEAN) {
1645 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001646 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001647 if (ctxt->nsTab[i] == prefix) {
1648 /* in scope */
1649 if (ctxt->nsTab[i + 1] == URL)
1650 return(-2);
1651 /* out of scope keep it */
1652 break;
1653 }
1654 }
1655 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001656 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1657 ctxt->nsMax = 10;
1658 ctxt->nsNr = 0;
1659 ctxt->nsTab = (const xmlChar **)
1660 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1661 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001662 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001663 ctxt->nsMax = 0;
1664 return (-1);
1665 }
1666 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001667 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001668 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001669 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1670 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1671 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001672 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001673 ctxt->nsMax /= 2;
1674 return (-1);
1675 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001676 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001677 }
1678 ctxt->nsTab[ctxt->nsNr++] = prefix;
1679 ctxt->nsTab[ctxt->nsNr++] = URL;
1680 return (ctxt->nsNr);
1681}
1682/**
1683 * nsPop:
1684 * @ctxt: an XML parser context
1685 * @nr: the number to pop
1686 *
1687 * Pops the top @nr parser prefix/namespace from the ns stack
1688 *
1689 * Returns the number of namespaces removed
1690 */
1691static int
1692nsPop(xmlParserCtxtPtr ctxt, int nr)
1693{
1694 int i;
1695
1696 if (ctxt->nsTab == NULL) return(0);
1697 if (ctxt->nsNr < nr) {
1698 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1699 nr = ctxt->nsNr;
1700 }
1701 if (ctxt->nsNr <= 0)
1702 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001703
Daniel Veillard0fb18932003-09-07 09:14:37 +00001704 for (i = 0;i < nr;i++) {
1705 ctxt->nsNr--;
1706 ctxt->nsTab[ctxt->nsNr] = NULL;
1707 }
1708 return(nr);
1709}
1710#endif
1711
1712static int
1713xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1714 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001715 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001716 int maxatts;
1717
1718 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001719 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001720 atts = (const xmlChar **)
1721 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001722 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001723 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001724 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1725 if (attallocs == NULL) goto mem_error;
1726 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001727 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001728 } else if (nr + 5 > ctxt->maxatts) {
1729 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001730 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1731 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001732 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001733 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001734 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1735 (maxatts / 5) * sizeof(int));
1736 if (attallocs == NULL) goto mem_error;
1737 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001738 ctxt->maxatts = maxatts;
1739 }
1740 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001741mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001742 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001743 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001744}
1745
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001746/**
1747 * inputPush:
1748 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001749 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001750 *
1751 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001752 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001753 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001754 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001755int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1757{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001758 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001759 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001760 if (ctxt->inputNr >= ctxt->inputMax) {
1761 ctxt->inputMax *= 2;
1762 ctxt->inputTab =
1763 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1764 ctxt->inputMax *
1765 sizeof(ctxt->inputTab[0]));
1766 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001767 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001768 xmlFreeInputStream(value);
1769 ctxt->inputMax /= 2;
1770 value = NULL;
1771 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 }
1773 }
1774 ctxt->inputTab[ctxt->inputNr] = value;
1775 ctxt->input = value;
1776 return (ctxt->inputNr++);
1777}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001778/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001780 * @ctxt: an XML parser context
1781 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001782 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001783 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001784 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001785 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001786xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001787inputPop(xmlParserCtxtPtr ctxt)
1788{
1789 xmlParserInputPtr ret;
1790
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001791 if (ctxt == NULL)
1792 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001793 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001794 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001795 ctxt->inputNr--;
1796 if (ctxt->inputNr > 0)
1797 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1798 else
1799 ctxt->input = NULL;
1800 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001801 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001802 return (ret);
1803}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001804/**
1805 * nodePush:
1806 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001807 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001808 *
1809 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001810 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001811 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001812 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001813int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001814nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1815{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001816 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001817 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001818 xmlNodePtr *tmp;
1819
1820 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1821 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001822 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001823 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001824 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001825 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001826 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001827 ctxt->nodeTab = tmp;
1828 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001829 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001830 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1831 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001832 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001833 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001834 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001835 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001836 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001837 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001838 ctxt->nodeTab[ctxt->nodeNr] = value;
1839 ctxt->node = value;
1840 return (ctxt->nodeNr++);
1841}
Daniel Veillard8915c152008-08-26 13:05:34 +00001842
Daniel Veillard1c732d22002-11-30 11:22:59 +00001843/**
1844 * nodePop:
1845 * @ctxt: an XML parser context
1846 *
1847 * Pops the top element node from the node stack
1848 *
1849 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001850 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001851xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001852nodePop(xmlParserCtxtPtr ctxt)
1853{
1854 xmlNodePtr ret;
1855
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001856 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001857 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001858 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001859 ctxt->nodeNr--;
1860 if (ctxt->nodeNr > 0)
1861 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1862 else
1863 ctxt->node = NULL;
1864 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001865 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866 return (ret);
1867}
Daniel Veillarda2351322004-06-27 12:08:10 +00001868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 * nameNsPush:
1871 * @ctxt: an XML parser context
1872 * @value: the element name
1873 * @prefix: the element prefix
1874 * @URI: the element namespace name
Elliott Hughese54f00d2021-05-13 08:13:46 -07001875 * @line: the current line number for error messages
1876 * @nsNr: the number of namespaces pushed on the namespace table
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 *
1878 * Pushes a new element name/prefix/URL on top of the name stack
1879 *
1880 * Returns -1 in case of error, the index in the stack otherwise
1881 */
1882static int
1883nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
Elliott Hughese54f00d2021-05-13 08:13:46 -07001884 const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885{
Elliott Hughese54f00d2021-05-13 08:13:46 -07001886 xmlStartTag *tag;
1887
Daniel Veillarde57ec792003-09-10 10:50:59 +00001888 if (ctxt->nameNr >= ctxt->nameMax) {
1889 const xmlChar * *tmp;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001890 xmlStartTag *tmp2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001891 ctxt->nameMax *= 2;
1892 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1893 ctxt->nameMax *
1894 sizeof(ctxt->nameTab[0]));
1895 if (tmp == NULL) {
1896 ctxt->nameMax /= 2;
1897 goto mem_error;
1898 }
1899 ctxt->nameTab = tmp;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001900 tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1901 ctxt->nameMax *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001902 sizeof(ctxt->pushTab[0]));
1903 if (tmp2 == NULL) {
1904 ctxt->nameMax /= 2;
1905 goto mem_error;
1906 }
1907 ctxt->pushTab = tmp2;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001908 } else if (ctxt->pushTab == NULL) {
Elliott Hughese54f00d2021-05-13 08:13:46 -07001909 ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001910 sizeof(ctxt->pushTab[0]));
1911 if (ctxt->pushTab == NULL)
1912 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001913 }
1914 ctxt->nameTab[ctxt->nameNr] = value;
1915 ctxt->name = value;
Elliott Hughese54f00d2021-05-13 08:13:46 -07001916 tag = &ctxt->pushTab[ctxt->nameNr];
1917 tag->prefix = prefix;
1918 tag->URI = URI;
1919 tag->line = line;
1920 tag->nsNr = nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001921 return (ctxt->nameNr++);
1922mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001923 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001924 return (-1);
1925}
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001926#ifdef LIBXML_PUSH_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001927/**
1928 * nameNsPop:
1929 * @ctxt: an XML parser context
1930 *
1931 * Pops the top element/prefix/URI name from the name stack
1932 *
1933 * Returns the name just removed
1934 */
1935static const xmlChar *
1936nameNsPop(xmlParserCtxtPtr ctxt)
1937{
1938 const xmlChar *ret;
1939
1940 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001941 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001942 ctxt->nameNr--;
1943 if (ctxt->nameNr > 0)
1944 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945 else
1946 ctxt->name = NULL;
1947 ret = ctxt->nameTab[ctxt->nameNr];
1948 ctxt->nameTab[ctxt->nameNr] = NULL;
1949 return (ret);
1950}
Daniel Veillarda2351322004-06-27 12:08:10 +00001951#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001952
1953/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001954 * namePush:
1955 * @ctxt: an XML parser context
1956 * @value: the element name
1957 *
1958 * Pushes a new element name on top of the name stack
1959 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001960 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001961 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001962int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001963namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001964{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001965 if (ctxt == NULL) return (-1);
1966
Daniel Veillard1c732d22002-11-30 11:22:59 +00001967 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001968 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001969 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001970 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001971 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001972 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001973 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001974 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001975 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001976 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001977 }
1978 ctxt->nameTab[ctxt->nameNr] = value;
1979 ctxt->name = value;
1980 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001981mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001982 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001983 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001984}
1985/**
1986 * namePop:
1987 * @ctxt: an XML parser context
1988 *
1989 * Pops the top element name from the name stack
1990 *
1991 * Returns the name just removed
1992 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001993const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001994namePop(xmlParserCtxtPtr ctxt)
1995{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001996 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001997
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001998 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1999 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00002000 ctxt->nameNr--;
2001 if (ctxt->nameNr > 0)
2002 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2003 else
2004 ctxt->name = NULL;
2005 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00002006 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00002007 return (ret);
2008}
Owen Taylor3473f882001-02-23 17:55:21 +00002009
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002010static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00002011 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002012 int *tmp;
2013
Owen Taylor3473f882001-02-23 17:55:21 +00002014 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002015 tmp = (int *) xmlRealloc(ctxt->spaceTab,
2016 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2017 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002018 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002019 ctxt->spaceMax /=2;
2020 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002021 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00002022 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002023 }
2024 ctxt->spaceTab[ctxt->spaceNr] = val;
2025 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2026 return(ctxt->spaceNr++);
2027}
2028
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002029static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002030 int ret;
2031 if (ctxt->spaceNr <= 0) return(0);
2032 ctxt->spaceNr--;
2033 if (ctxt->spaceNr > 0)
2034 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2035 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00002036 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00002037 ret = ctxt->spaceTab[ctxt->spaceNr];
2038 ctxt->spaceTab[ctxt->spaceNr] = -1;
2039 return(ret);
2040}
2041
2042/*
2043 * Macros for accessing the content. Those should be used only by the parser,
2044 * and not exported.
2045 *
2046 * Dirty macros, i.e. one often need to make assumption on the context to
2047 * use them
2048 *
2049 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2050 * To be used with extreme caution since operations consuming
2051 * characters may move the input buffer to a different location !
2052 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2053 * This should be used internally by the parser
2054 * only to compare to ASCII values otherwise it would break when
2055 * running with UTF-8 encoding.
2056 * RAW same as CUR but in the input buffer, bypass any token
2057 * extraction that may have been done
2058 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2059 * to compare on ASCII based substring.
2060 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002061 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002062 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002063 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002064 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2065 *
2066 * NEXT Skip to the next character, this does the proper decoding
2067 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002068 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002069 * CUR_CHAR(l) returns the current unicode character (int), set l
2070 * to the number of xmlChars used for the encoding [0-5].
2071 * CUR_SCHAR same but operate on a string instead of the context
2072 * COPY_BUF copy the current unicode char to the target buffer, increment
2073 * the index
2074 * GROW, SHRINK handling of input buffers
2075 */
2076
Daniel Veillardfdc91562002-07-01 21:52:03 +00002077#define RAW (*ctxt->input->cur)
2078#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002079#define NXT(val) ctxt->input->cur[(val)]
2080#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002081#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002082
Daniel Veillarda07050d2003-10-19 14:46:32 +00002083#define CMP4( s, c1, c2, c3, c4 ) \
2084 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2085 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2086#define CMP5( s, c1, c2, c3, c4, c5 ) \
2087 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2088#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2089 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2090#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2091 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2092#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2093 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2094#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2095 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2096 ((unsigned char *) s)[ 8 ] == c9 )
2097#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2098 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2099 ((unsigned char *) s)[ 9 ] == c10 )
2100
Owen Taylor3473f882001-02-23 17:55:21 +00002101#define SKIP(val) do { \
Haibo Huangf0a546b2020-09-01 20:28:19 -07002102 ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002103 if (*ctxt->input->cur == 0) \
2104 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002105 } while (0)
2106
Daniel Veillard0b787f32004-03-26 17:29:53 +00002107#define SKIPL(val) do { \
2108 int skipl; \
2109 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002110 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002111 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002112 } else ctxt->input->col++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002113 ctxt->input->cur++; \
2114 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002115 if (*ctxt->input->cur == 0) \
2116 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002117 } while (0)
2118
Daniel Veillarda880b122003-04-21 21:36:41 +00002119#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002120 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2121 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002122 xmlSHRINK (ctxt);
2123
2124static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2125 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002126 if (*ctxt->input->cur == 0)
2127 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2128}
Owen Taylor3473f882001-02-23 17:55:21 +00002129
Daniel Veillarda880b122003-04-21 21:36:41 +00002130#define GROW if ((ctxt->progressive == 0) && \
2131 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002132 xmlGROW (ctxt);
2133
2134static void xmlGROW (xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002135 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2136 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
Longstreth Jon190a0b82014-02-06 10:58:17 +01002137
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002138 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2139 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
Vlad Tsyrklevich28f52fe2017-08-10 15:08:48 -07002140 ((ctxt->input->buf) &&
2141 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002142 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2143 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002144 xmlHaltParser(ctxt);
2145 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002146 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002147 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002148 if ((ctxt->input->cur > ctxt->input->end) ||
2149 (ctxt->input->cur < ctxt->input->base)) {
2150 xmlHaltParser(ctxt);
2151 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2152 return;
2153 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002154 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2155 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002156}
Owen Taylor3473f882001-02-23 17:55:21 +00002157
2158#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2159
2160#define NEXT xmlNextChar(ctxt)
2161
Daniel Veillard21a0f912001-02-25 19:54:14 +00002162#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002163 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002164 ctxt->input->cur++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002165 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2167 }
2168
Owen Taylor3473f882001-02-23 17:55:21 +00002169#define NEXTL(l) do { \
2170 if (*(ctxt->input->cur) == '\n') { \
2171 ctxt->input->line++; ctxt->input->col = 1; \
2172 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002173 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002174 } while (0)
2175
2176#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2177#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2178
2179#define COPY_BUF(l,b,i,v) \
2180 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002181 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002182
2183/**
2184 * xmlSkipBlankChars:
2185 * @ctxt: the XML parser context
2186 *
2187 * skip all blanks character found at that point in the input streams.
2188 * It pops up finished entities in the process if allowable at that point.
2189 *
2190 * Returns the number of space chars skipped
2191 */
2192
2193int
2194xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002195 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002196
2197 /*
2198 * It's Okay to use CUR/NEXT here since all the blanks are on
2199 * the ASCII range.
2200 */
Elliott Hughesecdab2a2022-02-23 14:33:50 -08002201 if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2202 (ctxt->instate == XML_PARSER_START)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002203 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002204 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002205 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002206 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002207 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002208 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002209 if (*cur == '\n') {
2210 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002211 } else {
2212 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002213 }
2214 cur++;
2215 res++;
2216 if (*cur == 0) {
2217 ctxt->input->cur = cur;
2218 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2219 cur = ctxt->input->cur;
2220 }
2221 }
2222 ctxt->input->cur = cur;
2223 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002224 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2225
2226 while (1) {
2227 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002228 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002229 } else if (CUR == '%') {
2230 /*
2231 * Need to handle support of entities branching here
2232 */
2233 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2234 break;
2235 xmlParsePEReference(ctxt);
2236 } else if (CUR == 0) {
2237 if (ctxt->inputNr <= 1)
2238 break;
2239 xmlPopInput(ctxt);
2240 } else {
2241 break;
2242 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002243
2244 /*
2245 * Also increase the counter when entering or exiting a PERef.
2246 * The spec says: "When a parameter-entity reference is recognized
2247 * in the DTD and included, its replacement text MUST be enlarged
2248 * by the attachment of one leading and one following space (#x20)
2249 * character."
2250 */
2251 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002252 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002253 }
Owen Taylor3473f882001-02-23 17:55:21 +00002254 return(res);
2255}
2256
2257/************************************************************************
2258 * *
2259 * Commodity functions to handle entities *
2260 * *
2261 ************************************************************************/
2262
2263/**
2264 * xmlPopInput:
2265 * @ctxt: an XML parser context
2266 *
2267 * xmlPopInput: the current input pointed by ctxt->input came to an end
2268 * pop it and return the next char.
2269 *
2270 * Returns the current xmlChar in the parser context
2271 */
2272xmlChar
2273xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002274 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002275 if (xmlParserDebugEntities)
2276 xmlGenericError(xmlGenericErrorContext,
2277 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002278 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2279 (ctxt->instate != XML_PARSER_EOF))
2280 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2281 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002282 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002283 if (*ctxt->input->cur == 0)
2284 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002285 return(CUR);
2286}
2287
2288/**
2289 * xmlPushInput:
2290 * @ctxt: an XML parser context
2291 * @input: an XML parser input fragment (entity, XML fragment ...).
2292 *
2293 * xmlPushInput: switch to a new input stream which is stacked on top
2294 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002295 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002296 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002297int
Owen Taylor3473f882001-02-23 17:55:21 +00002298xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002299 int ret;
2300 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002301
2302 if (xmlParserDebugEntities) {
2303 if ((ctxt->input != NULL) && (ctxt->input->filename))
2304 xmlGenericError(xmlGenericErrorContext,
2305 "%s(%d): ", ctxt->input->filename,
2306 ctxt->input->line);
2307 xmlGenericError(xmlGenericErrorContext,
2308 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2309 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002310 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2311 (ctxt->inputNr > 1024)) {
2312 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2313 while (ctxt->inputNr > 1)
2314 xmlFreeInputStream(inputPop(ctxt));
2315 return(-1);
2316 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002317 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002318 if (ctxt->instate == XML_PARSER_EOF)
2319 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002320 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002321 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002322}
2323
2324/**
2325 * xmlParseCharRef:
2326 * @ctxt: an XML parser context
2327 *
2328 * parse Reference declarations
2329 *
2330 * [66] CharRef ::= '&#' [0-9]+ ';' |
2331 * '&#x' [0-9a-fA-F]+ ';'
2332 *
2333 * [ WFC: Legal Character ]
2334 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002335 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002336 *
2337 * Returns the value parsed (as an int), 0 in case of error
2338 */
2339int
2340xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002341 int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002342 int count = 0;
2343
Owen Taylor3473f882001-02-23 17:55:21 +00002344 /*
2345 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2346 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002347 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002348 (NXT(2) == 'x')) {
2349 SKIP(3);
2350 GROW;
2351 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002352 if (count++ > 20) {
2353 count = 0;
2354 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002355 if (ctxt->instate == XML_PARSER_EOF)
2356 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002357 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002358 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002359 val = val * 16 + (CUR - '0');
2360 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2361 val = val * 16 + (CUR - 'a') + 10;
2362 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2363 val = val * 16 + (CUR - 'A') + 10;
2364 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002365 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002366 val = 0;
2367 break;
2368 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002369 if (val > 0x110000)
2370 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002371
Owen Taylor3473f882001-02-23 17:55:21 +00002372 NEXT;
2373 count++;
2374 }
2375 if (RAW == ';') {
2376 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002377 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002378 ctxt->input->cur++;
2379 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002380 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002381 SKIP(2);
2382 GROW;
2383 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002384 if (count++ > 20) {
2385 count = 0;
2386 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002387 if (ctxt->instate == XML_PARSER_EOF)
2388 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002389 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002390 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002391 val = val * 10 + (CUR - '0');
2392 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002393 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002394 val = 0;
2395 break;
2396 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002397 if (val > 0x110000)
2398 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002399
Owen Taylor3473f882001-02-23 17:55:21 +00002400 NEXT;
2401 count++;
2402 }
2403 if (RAW == ';') {
2404 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002405 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002406 ctxt->input->cur++;
2407 }
2408 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002409 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002410 }
2411
2412 /*
2413 * [ WFC: Legal Character ]
2414 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002415 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002416 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002417 if (val >= 0x110000) {
2418 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2419 "xmlParseCharRef: character reference out of bounds\n",
2420 val);
2421 } else if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002422 return(val);
2423 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002424 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2425 "xmlParseCharRef: invalid xmlChar value %d\n",
2426 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002427 }
2428 return(0);
2429}
2430
2431/**
2432 * xmlParseStringCharRef:
2433 * @ctxt: an XML parser context
2434 * @str: a pointer to an index in the string
2435 *
2436 * parse Reference declarations, variant parsing from a string rather
2437 * than an an input flow.
2438 *
2439 * [66] CharRef ::= '&#' [0-9]+ ';' |
2440 * '&#x' [0-9a-fA-F]+ ';'
2441 *
2442 * [ WFC: Legal Character ]
2443 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002444 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002445 *
2446 * Returns the value parsed (as an int), 0 in case of error, str will be
2447 * updated to the current value of the index
2448 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002449static int
Owen Taylor3473f882001-02-23 17:55:21 +00002450xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2451 const xmlChar *ptr;
2452 xmlChar cur;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002453 int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002454
2455 if ((str == NULL) || (*str == NULL)) return(0);
2456 ptr = *str;
2457 cur = *ptr;
2458 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2459 ptr += 3;
2460 cur = *ptr;
2461 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002462 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002463 val = val * 16 + (cur - '0');
2464 else if ((cur >= 'a') && (cur <= 'f'))
2465 val = val * 16 + (cur - 'a') + 10;
2466 else if ((cur >= 'A') && (cur <= 'F'))
2467 val = val * 16 + (cur - 'A') + 10;
2468 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002469 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002470 val = 0;
2471 break;
2472 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002473 if (val > 0x110000)
2474 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002475
Owen Taylor3473f882001-02-23 17:55:21 +00002476 ptr++;
2477 cur = *ptr;
2478 }
2479 if (cur == ';')
2480 ptr++;
2481 } else if ((cur == '&') && (ptr[1] == '#')){
2482 ptr += 2;
2483 cur = *ptr;
2484 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002485 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002486 val = val * 10 + (cur - '0');
2487 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002488 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002489 val = 0;
2490 break;
2491 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002492 if (val > 0x110000)
2493 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002494
Owen Taylor3473f882001-02-23 17:55:21 +00002495 ptr++;
2496 cur = *ptr;
2497 }
2498 if (cur == ';')
2499 ptr++;
2500 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002501 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002502 return(0);
2503 }
2504 *str = ptr;
2505
2506 /*
2507 * [ WFC: Legal Character ]
2508 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002509 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002510 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002511 if (val >= 0x110000) {
2512 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2513 "xmlParseStringCharRef: character reference out of bounds\n",
2514 val);
2515 } else if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return(val);
2517 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002518 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2519 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2520 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002521 }
2522 return(0);
2523}
2524
2525/**
2526 * xmlParserHandlePEReference:
2527 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002528 *
Owen Taylor3473f882001-02-23 17:55:21 +00002529 * [69] PEReference ::= '%' Name ';'
2530 *
2531 * [ WFC: No Recursion ]
2532 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002533 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002534 *
2535 * [ WFC: Entity Declared ]
2536 * In a document without any DTD, a document with only an internal DTD
2537 * subset which contains no parameter entity references, or a document
2538 * with "standalone='yes'", ... ... The declaration of a parameter
2539 * entity must precede any reference to it...
2540 *
2541 * [ VC: Entity Declared ]
2542 * In a document with an external subset or external parameter entities
2543 * with "standalone='no'", ... ... The declaration of a parameter entity
2544 * must precede any reference to it...
2545 *
2546 * [ WFC: In DTD ]
2547 * Parameter-entity references may only appear in the DTD.
2548 * NOTE: misleading but this is handled.
2549 *
2550 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002551 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002552 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002553 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002554 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002555 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002556 */
2557void
2558xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002559 switch(ctxt->instate) {
2560 case XML_PARSER_CDATA_SECTION:
2561 return;
2562 case XML_PARSER_COMMENT:
2563 return;
2564 case XML_PARSER_START_TAG:
2565 return;
2566 case XML_PARSER_END_TAG:
2567 return;
2568 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002569 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002570 return;
2571 case XML_PARSER_PROLOG:
2572 case XML_PARSER_START:
2573 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002574 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002575 return;
2576 case XML_PARSER_ENTITY_DECL:
2577 case XML_PARSER_CONTENT:
2578 case XML_PARSER_ATTRIBUTE_VALUE:
2579 case XML_PARSER_PI:
2580 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002581 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002582 /* we just ignore it there */
2583 return;
2584 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002585 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002586 return;
2587 case XML_PARSER_ENTITY_VALUE:
2588 /*
2589 * NOTE: in the case of entity values, we don't do the
2590 * substitution here since we need the literal
2591 * entity value to be able to save the internal
2592 * subset of the document.
2593 * This will be handled by xmlStringDecodeEntities
2594 */
2595 return;
2596 case XML_PARSER_DTD:
2597 /*
2598 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2599 * In the internal DTD subset, parameter-entity references
2600 * can occur only where markup declarations can occur, not
2601 * within markup declarations.
2602 * In that case this is handled in xmlParseMarkupDecl
2603 */
2604 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2605 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002606 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002607 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002608 break;
2609 case XML_PARSER_IGNORE:
2610 return;
2611 }
2612
Nick Wellnhofer03904152017-06-05 21:16:00 +02002613 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002614}
2615
2616/*
2617 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002618 * buffer##_size is expected to be a size_t
2619 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002620 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002621#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002622 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002623 size_t new_size = buffer##_size * 2 + n; \
2624 if (new_size < buffer##_size) goto mem_error; \
2625 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002626 if (tmp == NULL) goto mem_error; \
2627 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002628 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002629}
2630
2631/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002632 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002633 * @ctxt: the parser context
2634 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002635 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002636 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637 * @end: an end marker xmlChar, 0 if none
2638 * @end2: an end marker xmlChar, 0 if none
2639 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002640 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002641 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002642 *
2643 * [67] Reference ::= EntityRef | CharRef
2644 *
2645 * [69] PEReference ::= '%' Name ';'
2646 *
2647 * Returns A newly allocated string with the substitution done. The caller
2648 * must deallocate it !
2649 */
2650xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002651xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2652 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002653 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002654 size_t buffer_size = 0;
2655 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002656
2657 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002658 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002659 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002660 xmlEntityPtr ent;
2661 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002662
Daniel Veillarda82b1822004-11-08 16:24:57 +00002663 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002664 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002665 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002666
Daniel Veillard0161e632008-08-28 15:36:32 +00002667 if (((ctxt->depth > 40) &&
2668 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2669 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002670 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002671 return(NULL);
2672 }
2673
2674 /*
2675 * allocate a translation buffer.
2676 */
2677 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002678 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002679 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002680
2681 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002682 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002683 * we are operating on already parsed values.
2684 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002685 if (str < last)
2686 c = CUR_SCHAR(str, l);
2687 else
2688 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002689 while ((c != 0) && (c != end) && /* non input consuming loop */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002690 (c != end2) && (c != end3) &&
2691 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002692
2693 if (c == 0) break;
2694 if ((c == '&') && (str[1] == '#')) {
2695 int val = xmlParseStringCharRef(ctxt, &str);
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002696 if (val == 0)
2697 goto int_error;
2698 COPY_BUF(0,buffer,nbchars,val);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002699 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002700 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002701 }
Owen Taylor3473f882001-02-23 17:55:21 +00002702 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2703 if (xmlParserDebugEntities)
2704 xmlGenericError(xmlGenericErrorContext,
2705 "String decoding Entity Reference: %.30s\n",
2706 str);
2707 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002708 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002709 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002710 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002711 if ((ent != NULL) &&
2712 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2713 if (ent->content != NULL) {
2714 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002715 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002716 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002717 }
Owen Taylor3473f882001-02-23 17:55:21 +00002718 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002719 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2720 "predefined entity has no content\n");
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002721 goto int_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002722 }
2723 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002724 ctxt->depth++;
2725 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2726 0, 0, 0);
2727 ctxt->depth--;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002728 if (rep == NULL) {
2729 ent->content[0] = 0;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002730 goto int_error;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002731 }
Daniel Veillard0161e632008-08-28 15:36:32 +00002732
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002733 current = rep;
2734 while (*current != 0) { /* non input consuming loop */
2735 buffer[nbchars++] = *current++;
2736 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2737 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2738 goto int_error;
2739 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2740 }
2741 }
2742 xmlFree(rep);
2743 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002744 } else if (ent != NULL) {
2745 int i = xmlStrlen(ent->name);
2746 const xmlChar *cur = ent->name;
2747
2748 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002749 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002750 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002751 }
2752 for (;i > 0;i--)
2753 buffer[nbchars++] = *cur++;
2754 buffer[nbchars++] = ';';
2755 }
2756 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2757 if (xmlParserDebugEntities)
2758 xmlGenericError(xmlGenericErrorContext,
2759 "String decoding PE Reference: %.30s\n", str);
2760 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002761 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002762 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002763 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002764 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002765 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002766 /*
2767 * Note: external parsed entities will not be loaded,
2768 * it is not required for a non-validating parser to
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002769 * complete external PEReferences coming from the
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002770 * internal subset
2771 */
2772 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2773 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2774 (ctxt->validate != 0)) {
2775 xmlLoadEntityContent(ctxt, ent);
2776 } else {
2777 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2778 "not validating will not read content for PE entity %s\n",
2779 ent->name, NULL);
2780 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002781 }
Owen Taylor3473f882001-02-23 17:55:21 +00002782 ctxt->depth++;
2783 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2784 0, 0, 0);
2785 ctxt->depth--;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002786 if (rep == NULL) {
2787 if (ent->content != NULL)
2788 ent->content[0] = 0;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002789 goto int_error;
Elliott Hughes5cefca72021-05-06 13:23:15 -07002790 }
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002791 current = rep;
2792 while (*current != 0) { /* non input consuming loop */
2793 buffer[nbchars++] = *current++;
2794 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2795 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2796 goto int_error;
2797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2798 }
2799 }
2800 xmlFree(rep);
2801 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002802 }
2803 } else {
2804 COPY_BUF(l,buffer,nbchars,c);
2805 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002806 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2807 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002808 }
2809 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002810 if (str < last)
2811 c = CUR_SCHAR(str, l);
2812 else
2813 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002814 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002815 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002816 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002817
2818mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002819 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002820int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002821 if (rep != NULL)
2822 xmlFree(rep);
2823 if (buffer != NULL)
2824 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002825 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002826}
2827
Daniel Veillarde57ec792003-09-10 10:50:59 +00002828/**
2829 * xmlStringDecodeEntities:
2830 * @ctxt: the parser context
2831 * @str: the input string
2832 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2833 * @end: an end marker xmlChar, 0 if none
2834 * @end2: an end marker xmlChar, 0 if none
2835 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002836 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002837 * Takes a entity string content and process to do the adequate substitutions.
2838 *
2839 * [67] Reference ::= EntityRef | CharRef
2840 *
2841 * [69] PEReference ::= '%' Name ';'
2842 *
2843 * Returns A newly allocated string with the substitution done. The caller
2844 * must deallocate it !
2845 */
2846xmlChar *
2847xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2848 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002849 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002850 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2851 end, end2, end3));
2852}
Owen Taylor3473f882001-02-23 17:55:21 +00002853
2854/************************************************************************
2855 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002856 * Commodity functions, cleanup needed ? *
2857 * *
2858 ************************************************************************/
2859
2860/**
2861 * areBlanks:
2862 * @ctxt: an XML parser context
2863 * @str: a xmlChar *
2864 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002865 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002866 *
2867 * Is this a sequence of blank chars that one can ignore ?
2868 *
2869 * Returns 1 if ignorable 0 otherwise.
2870 */
2871
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002872static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2873 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002874 int i, ret;
2875 xmlNodePtr lastChild;
2876
Daniel Veillard05c13a22001-09-09 08:38:09 +00002877 /*
2878 * Don't spend time trying to differentiate them, the same callback is
2879 * used !
2880 */
2881 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002882 return(0);
2883
Owen Taylor3473f882001-02-23 17:55:21 +00002884 /*
2885 * Check for xml:space value.
2886 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002887 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2888 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002889 return(0);
2890
2891 /*
2892 * Check that the string is made of blanks
2893 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002894 if (blank_chars == 0) {
2895 for (i = 0;i < len;i++)
2896 if (!(IS_BLANK_CH(str[i]))) return(0);
2897 }
Owen Taylor3473f882001-02-23 17:55:21 +00002898
2899 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002900 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002901 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002902 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002903 if (ctxt->myDoc != NULL) {
2904 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2905 if (ret == 0) return(1);
2906 if (ret == 1) return(0);
2907 }
2908
2909 /*
2910 * Otherwise, heuristic :-\
2911 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002912 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002913 if ((ctxt->node->children == NULL) &&
2914 (RAW == '<') && (NXT(1) == '/')) return(0);
2915
2916 lastChild = xmlGetLastChild(ctxt->node);
2917 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002918 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2919 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002920 } else if (xmlNodeIsText(lastChild))
2921 return(0);
2922 else if ((ctxt->node->children != NULL) &&
2923 (xmlNodeIsText(ctxt->node->children)))
2924 return(0);
2925 return(1);
2926}
2927
Owen Taylor3473f882001-02-23 17:55:21 +00002928/************************************************************************
2929 * *
2930 * Extra stuff for namespace support *
2931 * Relates to http://www.w3.org/TR/WD-xml-names *
2932 * *
2933 ************************************************************************/
2934
2935/**
2936 * xmlSplitQName:
2937 * @ctxt: an XML parser context
2938 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002939 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002940 *
2941 * parse an UTF8 encoded XML qualified name string
2942 *
2943 * [NS 5] QName ::= (Prefix ':')? LocalPart
2944 *
2945 * [NS 6] Prefix ::= NCName
2946 *
2947 * [NS 7] LocalPart ::= NCName
2948 *
2949 * Returns the local part, and prefix is updated
2950 * to get the Prefix if any.
2951 */
2952
2953xmlChar *
2954xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2955 xmlChar buf[XML_MAX_NAMELEN + 5];
2956 xmlChar *buffer = NULL;
2957 int len = 0;
2958 int max = XML_MAX_NAMELEN;
2959 xmlChar *ret = NULL;
2960 const xmlChar *cur = name;
2961 int c;
2962
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002963 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002964 *prefix = NULL;
2965
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002966 if (cur == NULL) return(NULL);
2967
Owen Taylor3473f882001-02-23 17:55:21 +00002968#ifndef XML_XML_NAMESPACE
2969 /* xml: prefix is not really a namespace */
2970 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2971 (cur[2] == 'l') && (cur[3] == ':'))
2972 return(xmlStrdup(name));
2973#endif
2974
Daniel Veillard597bc482003-07-24 16:08:28 +00002975 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002976 if (cur[0] == ':')
2977 return(xmlStrdup(name));
2978
2979 c = *cur++;
2980 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2981 buf[len++] = c;
2982 c = *cur++;
2983 }
2984 if (len >= max) {
2985 /*
2986 * Okay someone managed to make a huge name, so he's ready to pay
2987 * for the processing speed.
2988 */
2989 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002990
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002991 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002992 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002993 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002994 return(NULL);
2995 }
2996 memcpy(buffer, buf, len);
2997 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2998 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002999 xmlChar *tmp;
3000
Owen Taylor3473f882001-02-23 17:55:21 +00003001 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003002 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003003 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003004 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003005 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003006 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003007 return(NULL);
3008 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003009 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003010 }
3011 buffer[len++] = c;
3012 c = *cur++;
3013 }
3014 buffer[len] = 0;
3015 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003016
Daniel Veillard597bc482003-07-24 16:08:28 +00003017 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003018 if (buffer != NULL)
3019 xmlFree(buffer);
3020 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003021 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003022 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003023
Owen Taylor3473f882001-02-23 17:55:21 +00003024 if (buffer == NULL)
3025 ret = xmlStrndup(buf, len);
3026 else {
3027 ret = buffer;
3028 buffer = NULL;
3029 max = XML_MAX_NAMELEN;
3030 }
3031
3032
3033 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003034 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003035 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003036 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003037 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003038 }
Owen Taylor3473f882001-02-23 17:55:21 +00003039 len = 0;
3040
Daniel Veillardbb284f42002-10-16 18:02:47 +00003041 /*
3042 * Check that the first character is proper to start
3043 * a new name
3044 */
3045 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3046 ((c >= 0x41) && (c <= 0x5A)) ||
3047 (c == '_') || (c == ':'))) {
3048 int l;
3049 int first = CUR_SCHAR(cur, l);
3050
3051 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003052 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003053 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003054 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003055 }
3056 }
3057 cur++;
3058
Owen Taylor3473f882001-02-23 17:55:21 +00003059 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3060 buf[len++] = c;
3061 c = *cur++;
3062 }
3063 if (len >= max) {
3064 /*
3065 * Okay someone managed to make a huge name, so he's ready to pay
3066 * for the processing speed.
3067 */
3068 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003069
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003070 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003071 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003072 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003073 return(NULL);
3074 }
3075 memcpy(buffer, buf, len);
3076 while (c != 0) { /* tested bigname2.xml */
3077 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003078 xmlChar *tmp;
3079
Owen Taylor3473f882001-02-23 17:55:21 +00003080 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003081 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003082 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003083 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003084 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003085 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003086 return(NULL);
3087 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003088 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003089 }
3090 buffer[len++] = c;
3091 c = *cur++;
3092 }
3093 buffer[len] = 0;
3094 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003095
Owen Taylor3473f882001-02-23 17:55:21 +00003096 if (buffer == NULL)
3097 ret = xmlStrndup(buf, len);
3098 else {
3099 ret = buffer;
3100 }
3101 }
3102
3103 return(ret);
3104}
3105
3106/************************************************************************
3107 * *
3108 * The parser itself *
3109 * Relates to http://www.w3.org/TR/REC-xml *
3110 * *
3111 ************************************************************************/
3112
Daniel Veillard34e3f642008-07-29 09:02:27 +00003113/************************************************************************
3114 * *
3115 * Routines to parse Name, NCName and NmToken *
3116 * *
3117 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003118#ifdef DEBUG
3119static unsigned long nbParseName = 0;
3120static unsigned long nbParseNmToken = 0;
3121static unsigned long nbParseNCName = 0;
3122static unsigned long nbParseNCNameComplex = 0;
3123static unsigned long nbParseNameComplex = 0;
3124static unsigned long nbParseStringName = 0;
3125#endif
3126
Daniel Veillard34e3f642008-07-29 09:02:27 +00003127/*
3128 * The two following functions are related to the change of accepted
3129 * characters for Name and NmToken in the Revision 5 of XML-1.0
3130 * They correspond to the modified production [4] and the new production [4a]
3131 * changes in that revision. Also note that the macros used for the
3132 * productions Letter, Digit, CombiningChar and Extender are not needed
3133 * anymore.
3134 * We still keep compatibility to pre-revision5 parsing semantic if the
3135 * new XML_PARSE_OLD10 option is given to the parser.
3136 */
3137static int
3138xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3139 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3140 /*
3141 * Use the new checks of production [4] [4a] amd [5] of the
3142 * Update 5 of XML-1.0
3143 */
3144 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3145 (((c >= 'a') && (c <= 'z')) ||
3146 ((c >= 'A') && (c <= 'Z')) ||
3147 (c == '_') || (c == ':') ||
3148 ((c >= 0xC0) && (c <= 0xD6)) ||
3149 ((c >= 0xD8) && (c <= 0xF6)) ||
3150 ((c >= 0xF8) && (c <= 0x2FF)) ||
3151 ((c >= 0x370) && (c <= 0x37D)) ||
3152 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3153 ((c >= 0x200C) && (c <= 0x200D)) ||
3154 ((c >= 0x2070) && (c <= 0x218F)) ||
3155 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3156 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3157 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3158 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3159 ((c >= 0x10000) && (c <= 0xEFFFF))))
3160 return(1);
3161 } else {
3162 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3163 return(1);
3164 }
3165 return(0);
3166}
3167
3168static int
3169xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3170 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3171 /*
3172 * Use the new checks of production [4] [4a] amd [5] of the
3173 * Update 5 of XML-1.0
3174 */
3175 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3176 (((c >= 'a') && (c <= 'z')) ||
3177 ((c >= 'A') && (c <= 'Z')) ||
3178 ((c >= '0') && (c <= '9')) || /* !start */
3179 (c == '_') || (c == ':') ||
3180 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3181 ((c >= 0xC0) && (c <= 0xD6)) ||
3182 ((c >= 0xD8) && (c <= 0xF6)) ||
3183 ((c >= 0xF8) && (c <= 0x2FF)) ||
3184 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3185 ((c >= 0x370) && (c <= 0x37D)) ||
3186 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3187 ((c >= 0x200C) && (c <= 0x200D)) ||
3188 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3189 ((c >= 0x2070) && (c <= 0x218F)) ||
3190 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3191 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3192 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3193 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3194 ((c >= 0x10000) && (c <= 0xEFFFF))))
3195 return(1);
3196 } else {
3197 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3198 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003199 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003200 (IS_COMBINING(c)) ||
3201 (IS_EXTENDER(c)))
3202 return(1);
3203 }
3204 return(0);
3205}
3206
Daniel Veillarde57ec792003-09-10 10:50:59 +00003207static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003208 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003209
Daniel Veillard34e3f642008-07-29 09:02:27 +00003210static const xmlChar *
3211xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3212 int len = 0, l;
3213 int c;
3214 int count = 0;
3215
Daniel Veillardc6561462009-03-25 10:22:31 +00003216#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003217 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003218#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003219
3220 /*
3221 * Handler for more complex cases
3222 */
3223 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003224 if (ctxt->instate == XML_PARSER_EOF)
3225 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003226 c = CUR_CHAR(l);
3227 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3228 /*
3229 * Use the new checks of production [4] [4a] amd [5] of the
3230 * Update 5 of XML-1.0
3231 */
3232 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3233 (!(((c >= 'a') && (c <= 'z')) ||
3234 ((c >= 'A') && (c <= 'Z')) ||
3235 (c == '_') || (c == ':') ||
3236 ((c >= 0xC0) && (c <= 0xD6)) ||
3237 ((c >= 0xD8) && (c <= 0xF6)) ||
3238 ((c >= 0xF8) && (c <= 0x2FF)) ||
3239 ((c >= 0x370) && (c <= 0x37D)) ||
3240 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3241 ((c >= 0x200C) && (c <= 0x200D)) ||
3242 ((c >= 0x2070) && (c <= 0x218F)) ||
3243 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3244 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3245 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3246 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3247 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3248 return(NULL);
3249 }
3250 len += l;
3251 NEXTL(l);
3252 c = CUR_CHAR(l);
3253 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3254 (((c >= 'a') && (c <= 'z')) ||
3255 ((c >= 'A') && (c <= 'Z')) ||
3256 ((c >= '0') && (c <= '9')) || /* !start */
3257 (c == '_') || (c == ':') ||
3258 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3259 ((c >= 0xC0) && (c <= 0xD6)) ||
3260 ((c >= 0xD8) && (c <= 0xF6)) ||
3261 ((c >= 0xF8) && (c <= 0x2FF)) ||
3262 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3263 ((c >= 0x370) && (c <= 0x37D)) ||
3264 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3265 ((c >= 0x200C) && (c <= 0x200D)) ||
3266 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3267 ((c >= 0x2070) && (c <= 0x218F)) ||
3268 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3269 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3270 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3271 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3272 ((c >= 0x10000) && (c <= 0xEFFFF))
3273 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003274 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003275 count = 0;
3276 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003277 if (ctxt->instate == XML_PARSER_EOF)
3278 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003279 }
3280 len += l;
3281 NEXTL(l);
3282 c = CUR_CHAR(l);
3283 }
3284 } else {
3285 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3286 (!IS_LETTER(c) && (c != '_') &&
3287 (c != ':'))) {
3288 return(NULL);
3289 }
3290 len += l;
3291 NEXTL(l);
3292 c = CUR_CHAR(l);
3293
3294 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3295 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3296 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003297 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003298 (IS_COMBINING(c)) ||
3299 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003300 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003301 count = 0;
3302 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003303 if (ctxt->instate == XML_PARSER_EOF)
3304 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003305 }
3306 len += l;
3307 NEXTL(l);
3308 c = CUR_CHAR(l);
3309 }
3310 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003311 if ((len > XML_MAX_NAME_LENGTH) &&
3312 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3313 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3314 return(NULL);
3315 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003316 if (ctxt->input->cur - ctxt->input->base < len) {
3317 /*
3318 * There were a couple of bugs where PERefs lead to to a change
3319 * of the buffer. Check the buffer size to avoid passing an invalid
3320 * pointer to xmlDictLookup.
3321 */
3322 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3323 "unexpected change of input buffer");
3324 return (NULL);
3325 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003326 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3327 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3328 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3329}
3330
Owen Taylor3473f882001-02-23 17:55:21 +00003331/**
3332 * xmlParseName:
3333 * @ctxt: an XML parser context
3334 *
3335 * parse an XML name.
3336 *
3337 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3338 * CombiningChar | Extender
3339 *
3340 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3341 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003342 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003343 *
3344 * Returns the Name parsed or NULL
3345 */
3346
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003347const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003348xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003349 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003350 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003351 int count = 0;
3352
3353 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003354
Daniel Veillardc6561462009-03-25 10:22:31 +00003355#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003356 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003357#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003358
Daniel Veillard48b2f892001-02-25 16:11:03 +00003359 /*
3360 * Accelerator for simple ASCII names
3361 */
3362 in = ctxt->input->cur;
3363 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3364 ((*in >= 0x41) && (*in <= 0x5A)) ||
3365 (*in == '_') || (*in == ':')) {
3366 in++;
3367 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3368 ((*in >= 0x41) && (*in <= 0x5A)) ||
3369 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003370 (*in == '_') || (*in == '-') ||
3371 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003372 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003373 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003374 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003375 if ((count > XML_MAX_NAME_LENGTH) &&
3376 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3377 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3378 return(NULL);
3379 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003380 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003381 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003382 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003383 if (ret == NULL)
3384 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003385 return(ret);
3386 }
3387 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003388 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003389 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003390}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003391
Daniel Veillard34e3f642008-07-29 09:02:27 +00003392static const xmlChar *
3393xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3394 int len = 0, l;
3395 int c;
3396 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003397 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003398
Daniel Veillardc6561462009-03-25 10:22:31 +00003399#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003400 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003401#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003402
3403 /*
3404 * Handler for more complex cases
3405 */
3406 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003407 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003408 c = CUR_CHAR(l);
3409 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3410 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3411 return(NULL);
3412 }
3413
3414 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3415 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003416 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003417 if ((len > XML_MAX_NAME_LENGTH) &&
3418 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3419 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3420 return(NULL);
3421 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003422 count = 0;
3423 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003424 if (ctxt->instate == XML_PARSER_EOF)
3425 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003426 }
3427 len += l;
3428 NEXTL(l);
3429 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003430 if (c == 0) {
3431 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003432 /*
3433 * when shrinking to extend the buffer we really need to preserve
3434 * the part of the name we already parsed. Hence rolling back
Haibo Huangcfd91dc2020-07-30 23:01:33 -07003435 * by current length.
Daniel Veillard51f02b02015-09-15 16:50:32 +08003436 */
3437 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003438 GROW;
3439 if (ctxt->instate == XML_PARSER_EOF)
3440 return(NULL);
Nick Wellnhofer132af1a2018-01-08 18:48:01 +01003441 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003442 c = CUR_CHAR(l);
3443 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003444 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003445 if ((len > XML_MAX_NAME_LENGTH) &&
3446 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3447 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3448 return(NULL);
3449 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003450 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003451}
3452
3453/**
3454 * xmlParseNCName:
3455 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003456 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003457 *
3458 * parse an XML name.
3459 *
3460 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3461 * CombiningChar | Extender
3462 *
3463 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3464 *
3465 * Returns the Name parsed or NULL
3466 */
3467
3468static const xmlChar *
3469xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003470 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003471 const xmlChar *ret;
3472 int count = 0;
3473
Daniel Veillardc6561462009-03-25 10:22:31 +00003474#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003475 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003476#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003477
3478 /*
3479 * Accelerator for simple ASCII names
3480 */
3481 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003482 e = ctxt->input->end;
3483 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3484 ((*in >= 0x41) && (*in <= 0x5A)) ||
3485 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003486 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003487 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3488 ((*in >= 0x41) && (*in <= 0x5A)) ||
3489 ((*in >= 0x30) && (*in <= 0x39)) ||
3490 (*in == '_') || (*in == '-') ||
3491 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003492 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003493 if (in >= e)
3494 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003495 if ((*in > 0) && (*in < 0x80)) {
3496 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003497 if ((count > XML_MAX_NAME_LENGTH) &&
3498 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3499 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3500 return(NULL);
3501 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003502 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3503 ctxt->input->cur = in;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504 ctxt->input->col += count;
3505 if (ret == NULL) {
3506 xmlErrMemory(ctxt, NULL);
3507 }
3508 return(ret);
3509 }
3510 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003511complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003512 return(xmlParseNCNameComplex(ctxt));
3513}
3514
Daniel Veillard46de64e2002-05-29 08:21:33 +00003515/**
3516 * xmlParseNameAndCompare:
3517 * @ctxt: an XML parser context
3518 *
3519 * parse an XML name and compares for match
3520 * (specialized for endtag parsing)
3521 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003522 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3523 * and the name for mismatch
3524 */
3525
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003526static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003527xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003528 register const xmlChar *cmp = other;
3529 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003530 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003531
3532 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003533 if (ctxt->instate == XML_PARSER_EOF)
3534 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003535
Daniel Veillard46de64e2002-05-29 08:21:33 +00003536 in = ctxt->input->cur;
3537 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003538 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003539 ++cmp;
3540 }
William M. Brack76e95df2003-10-18 16:20:14 +00003541 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003542 /* success */
Haibo Huangf0a546b2020-09-01 20:28:19 -07003543 ctxt->input->col += in - ctxt->input->cur;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003544 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003545 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003546 }
3547 /* failure (or end of input buffer), check with full function */
3548 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003549 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003550 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003551 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003552 }
3553 return ret;
3554}
3555
Owen Taylor3473f882001-02-23 17:55:21 +00003556/**
3557 * xmlParseStringName:
3558 * @ctxt: an XML parser context
3559 * @str: a pointer to the string pointer (IN/OUT)
3560 *
3561 * parse an XML name.
3562 *
3563 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3564 * CombiningChar | Extender
3565 *
3566 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3567 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003568 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003569 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003570 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003571 * is updated to the current location in the string.
3572 */
3573
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003574static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003575xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3576 xmlChar buf[XML_MAX_NAMELEN + 5];
3577 const xmlChar *cur = *str;
3578 int len = 0, l;
3579 int c;
3580
Daniel Veillardc6561462009-03-25 10:22:31 +00003581#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003582 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003583#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003584
Owen Taylor3473f882001-02-23 17:55:21 +00003585 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003586 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003587 return(NULL);
3588 }
3589
Daniel Veillard34e3f642008-07-29 09:02:27 +00003590 COPY_BUF(l,buf,len,c);
3591 cur += l;
3592 c = CUR_SCHAR(cur, l);
3593 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003594 COPY_BUF(l,buf,len,c);
3595 cur += l;
3596 c = CUR_SCHAR(cur, l);
3597 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3598 /*
3599 * Okay someone managed to make a huge name, so he's ready to pay
3600 * for the processing speed.
3601 */
3602 xmlChar *buffer;
3603 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003604
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003605 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003606 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003607 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003608 return(NULL);
3609 }
3610 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003611 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003612 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003613 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003614
3615 if ((len > XML_MAX_NAME_LENGTH) &&
3616 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3617 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3618 xmlFree(buffer);
3619 return(NULL);
3620 }
Owen Taylor3473f882001-02-23 17:55:21 +00003621 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003622 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003623 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003624 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003625 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003626 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003627 return(NULL);
3628 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003629 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003630 }
3631 COPY_BUF(l,buffer,len,c);
3632 cur += l;
3633 c = CUR_SCHAR(cur, l);
3634 }
3635 buffer[len] = 0;
3636 *str = cur;
3637 return(buffer);
3638 }
3639 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003640 if ((len > XML_MAX_NAME_LENGTH) &&
3641 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3642 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3643 return(NULL);
3644 }
Owen Taylor3473f882001-02-23 17:55:21 +00003645 *str = cur;
3646 return(xmlStrndup(buf, len));
3647}
3648
3649/**
3650 * xmlParseNmtoken:
3651 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003652 *
Owen Taylor3473f882001-02-23 17:55:21 +00003653 * parse an XML Nmtoken.
3654 *
3655 * [7] Nmtoken ::= (NameChar)+
3656 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003657 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003658 *
3659 * Returns the Nmtoken parsed or NULL
3660 */
3661
3662xmlChar *
3663xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3664 xmlChar buf[XML_MAX_NAMELEN + 5];
3665 int len = 0, l;
3666 int c;
3667 int count = 0;
3668
Daniel Veillardc6561462009-03-25 10:22:31 +00003669#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003670 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003671#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003672
Owen Taylor3473f882001-02-23 17:55:21 +00003673 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003674 if (ctxt->instate == XML_PARSER_EOF)
3675 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003676 c = CUR_CHAR(l);
3677
Daniel Veillard34e3f642008-07-29 09:02:27 +00003678 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003679 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003680 count = 0;
3681 GROW;
3682 }
3683 COPY_BUF(l,buf,len,c);
3684 NEXTL(l);
3685 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003686 if (c == 0) {
3687 count = 0;
3688 GROW;
3689 if (ctxt->instate == XML_PARSER_EOF)
3690 return(NULL);
3691 c = CUR_CHAR(l);
3692 }
Owen Taylor3473f882001-02-23 17:55:21 +00003693 if (len >= XML_MAX_NAMELEN) {
3694 /*
3695 * Okay someone managed to make a huge token, so he's ready to pay
3696 * for the processing speed.
3697 */
3698 xmlChar *buffer;
3699 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003700
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003701 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003702 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003703 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003704 return(NULL);
3705 }
3706 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003707 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003708 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003709 count = 0;
3710 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003711 if (ctxt->instate == XML_PARSER_EOF) {
3712 xmlFree(buffer);
3713 return(NULL);
3714 }
Owen Taylor3473f882001-02-23 17:55:21 +00003715 }
3716 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003717 xmlChar *tmp;
3718
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003719 if ((max > XML_MAX_NAME_LENGTH) &&
3720 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3721 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3722 xmlFree(buffer);
3723 return(NULL);
3724 }
Owen Taylor3473f882001-02-23 17:55:21 +00003725 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003726 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003727 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003728 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003729 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003730 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 return(NULL);
3732 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003733 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003734 }
3735 COPY_BUF(l,buffer,len,c);
3736 NEXTL(l);
3737 c = CUR_CHAR(l);
3738 }
3739 buffer[len] = 0;
3740 return(buffer);
3741 }
3742 }
3743 if (len == 0)
3744 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003745 if ((len > XML_MAX_NAME_LENGTH) &&
3746 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3747 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748 return(NULL);
3749 }
Owen Taylor3473f882001-02-23 17:55:21 +00003750 return(xmlStrndup(buf, len));
3751}
3752
3753/**
3754 * xmlParseEntityValue:
3755 * @ctxt: an XML parser context
3756 * @orig: if non-NULL store a copy of the original entity value
3757 *
3758 * parse a value for ENTITY declarations
3759 *
3760 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3761 * "'" ([^%&'] | PEReference | Reference)* "'"
3762 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003763 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003764 */
3765
3766xmlChar *
3767xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3768 xmlChar *buf = NULL;
3769 int len = 0;
3770 int size = XML_PARSER_BUFFER_SIZE;
3771 int c, l;
3772 xmlChar stop;
3773 xmlChar *ret = NULL;
3774 const xmlChar *cur = NULL;
3775 xmlParserInputPtr input;
3776
3777 if (RAW == '"') stop = '"';
3778 else if (RAW == '\'') stop = '\'';
3779 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003780 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003781 return(NULL);
3782 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003783 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003784 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003785 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003786 return(NULL);
3787 }
3788
3789 /*
3790 * The content of the entity definition is copied in a buffer.
3791 */
3792
3793 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3794 input = ctxt->input;
3795 GROW;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003796 if (ctxt->instate == XML_PARSER_EOF)
3797 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003798 NEXT;
3799 c = CUR_CHAR(l);
3800 /*
3801 * NOTE: 4.4.5 Included in Literal
3802 * When a parameter entity reference appears in a literal entity
3803 * value, ... a single or double quote character in the replacement
3804 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003805 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003806 * In practice it means we stop the loop only when back at parsing
3807 * the initial entity and the quote is found
3808 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003809 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3810 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003811 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003812 xmlChar *tmp;
3813
Owen Taylor3473f882001-02-23 17:55:21 +00003814 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003815 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3816 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003817 xmlErrMemory(ctxt, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003818 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003819 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003820 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003821 }
3822 COPY_BUF(l,buf,len,c);
3823 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003824
3825 GROW;
3826 c = CUR_CHAR(l);
3827 if (c == 0) {
3828 GROW;
3829 c = CUR_CHAR(l);
3830 }
3831 }
3832 buf[len] = 0;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003833 if (ctxt->instate == XML_PARSER_EOF)
3834 goto error;
3835 if (c != stop) {
3836 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3837 goto error;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003838 }
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003839 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00003840
3841 /*
3842 * Raise problem w.r.t. '&' and '%' being used in non-entities
3843 * reference constructs. Note Charref will be handled in
3844 * xmlStringDecodeEntities()
3845 */
3846 cur = buf;
3847 while (*cur != 0) { /* non input consuming */
3848 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3849 xmlChar *name;
3850 xmlChar tmp = *cur;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003851 int nameOk = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003852
3853 cur++;
3854 name = xmlParseStringName(ctxt, &cur);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003855 if (name != NULL) {
3856 nameOk = 1;
3857 xmlFree(name);
3858 }
3859 if ((nameOk == 0) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003860 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003861 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003862 tmp);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003863 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003864 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003865 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3866 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003867 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003868 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003869 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003870 if (*cur == 0)
3871 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003872 }
3873 cur++;
3874 }
3875
3876 /*
3877 * Then PEReference entities are substituted.
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003878 *
3879 * NOTE: 4.4.7 Bypassed
3880 * When a general entity reference appears in the EntityValue in
3881 * an entity declaration, it is bypassed and left as is.
3882 * so XML_SUBSTITUTE_REF is not set here.
Owen Taylor3473f882001-02-23 17:55:21 +00003883 */
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003884 ++ctxt->depth;
3885 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3886 0, 0, 0);
3887 --ctxt->depth;
3888 if (orig != NULL) {
3889 *orig = buf;
3890 buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003891 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003892
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003893error:
3894 if (buf != NULL)
3895 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003896 return(ret);
3897}
3898
3899/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003900 * xmlParseAttValueComplex:
3901 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003902 * @len: the resulting attribute len
Haibo Huangcfd91dc2020-07-30 23:01:33 -07003903 * @normalize: whether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003904 *
3905 * parse a value for an attribute, this is the fallback function
3906 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003907 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003908 *
3909 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3910 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003911static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003912xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003913 xmlChar limit = 0;
3914 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003915 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003916 size_t len = 0;
3917 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003918 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003919 xmlChar *current = NULL;
3920 xmlEntityPtr ent;
3921
Owen Taylor3473f882001-02-23 17:55:21 +00003922 if (NXT(0) == '"') {
3923 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3924 limit = '"';
3925 NEXT;
3926 } else if (NXT(0) == '\'') {
3927 limit = '\'';
3928 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3929 NEXT;
3930 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003931 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003932 return(NULL);
3933 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003934
Owen Taylor3473f882001-02-23 17:55:21 +00003935 /*
3936 * allocate a translation buffer.
3937 */
3938 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003939 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003940 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003941
3942 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003943 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003944 */
3945 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003946 while (((NXT(0) != limit) && /* checked */
3947 (IS_CHAR(c)) && (c != '<')) &&
3948 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003949 /*
3950 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3951 * special option is given
3952 */
3953 if ((len > XML_MAX_TEXT_LENGTH) &&
3954 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3955 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003956 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003957 goto mem_error;
3958 }
Daniel Veillardfdc91562002-07-01 21:52:03 +00003959 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003960 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003961 if (NXT(1) == '#') {
3962 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003963
Owen Taylor3473f882001-02-23 17:55:21 +00003964 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003965 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003966 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003967 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003968 }
3969 buf[len++] = '&';
3970 } else {
3971 /*
3972 * The reparsing will be done in xmlStringGetNodeList()
3973 * called by the attribute() function in SAX.c
3974 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003975 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003976 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003977 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003978 buf[len++] = '&';
3979 buf[len++] = '#';
3980 buf[len++] = '3';
3981 buf[len++] = '8';
3982 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003983 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003984 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003985 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003986 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003987 }
Owen Taylor3473f882001-02-23 17:55:21 +00003988 len += xmlCopyChar(0, &buf[len], val);
3989 }
3990 } else {
3991 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003992 ctxt->nbentities++;
3993 if (ent != NULL)
3994 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003995 if ((ent != NULL) &&
3996 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003997 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003998 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003999 }
4000 if ((ctxt->replaceEntities == 0) &&
4001 (ent->content[0] == '&')) {
4002 buf[len++] = '&';
4003 buf[len++] = '#';
4004 buf[len++] = '3';
4005 buf[len++] = '8';
4006 buf[len++] = ';';
4007 } else {
4008 buf[len++] = ent->content[0];
4009 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004010 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004011 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004012 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02004013 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004014 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004015 XML_SUBSTITUTE_REF,
4016 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004017 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004018 if (rep != NULL) {
4019 current = rep;
4020 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004021 if ((*current == 0xD) || (*current == 0xA) ||
4022 (*current == 0x9)) {
4023 buf[len++] = 0x20;
4024 current++;
4025 } else
4026 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004027 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004028 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004029 }
4030 }
4031 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004032 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004033 }
4034 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004035 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004036 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004037 }
Owen Taylor3473f882001-02-23 17:55:21 +00004038 if (ent->content != NULL)
4039 buf[len++] = ent->content[0];
4040 }
4041 } else if (ent != NULL) {
4042 int i = xmlStrlen(ent->name);
4043 const xmlChar *cur = ent->name;
4044
4045 /*
4046 * This may look absurd but is needed to detect
4047 * entities problems
4048 */
4049 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004050 (ent->content != NULL) && (ent->checked == 0)) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004051 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004052
Peter Simons8f30bdf2016-04-15 11:56:55 +02004053 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004054 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004055 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004056 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004057
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004058 diff = ctxt->nbentities - oldnbent + 1;
4059 if (diff > INT_MAX / 2)
4060 diff = INT_MAX / 2;
4061 ent->checked = diff * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004062 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004063 if (xmlStrchr(rep, '<'))
4064 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004065 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004066 rep = NULL;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02004067 } else {
4068 ent->content[0] = 0;
4069 }
Owen Taylor3473f882001-02-23 17:55:21 +00004070 }
4071
4072 /*
4073 * Just output the reference
4074 */
4075 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004076 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004077 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004078 }
4079 for (;i > 0;i--)
4080 buf[len++] = *cur++;
4081 buf[len++] = ';';
4082 }
4083 }
4084 } else {
4085 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004086 if ((len != 0) || (!normalize)) {
4087 if ((!normalize) || (!in_space)) {
4088 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004089 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004090 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004091 }
4092 }
4093 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004094 }
4095 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004096 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004097 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004098 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004099 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004100 }
4101 }
4102 NEXTL(l);
4103 }
4104 GROW;
4105 c = CUR_CHAR(l);
4106 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004107 if (ctxt->instate == XML_PARSER_EOF)
4108 goto error;
4109
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004110 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004111 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004112 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004113 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004114 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004115 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004116 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004117 if ((c != 0) && (!IS_CHAR(c))) {
4118 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4119 "invalid character in attribute value\n");
4120 } else {
4121 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4122 "AttValue: ' expected\n");
4123 }
Owen Taylor3473f882001-02-23 17:55:21 +00004124 } else
4125 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004126
4127 /*
4128 * There we potentially risk an overflow, don't allow attribute value of
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004129 * length more than INT_MAX it is a very reasonable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004130 */
4131 if (len >= INT_MAX) {
4132 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004133 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004134 goto mem_error;
4135 }
4136
4137 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004138 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004139
4140mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004141 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004142error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004143 if (buf != NULL)
4144 xmlFree(buf);
4145 if (rep != NULL)
4146 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004147 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004148}
4149
4150/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004151 * xmlParseAttValue:
4152 * @ctxt: an XML parser context
4153 *
4154 * parse a value for an attribute
4155 * Note: the parser won't do substitution of entities here, this
4156 * will be handled later in xmlStringGetNodeList
4157 *
4158 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4159 * "'" ([^<&'] | Reference)* "'"
4160 *
4161 * 3.3.3 Attribute-Value Normalization:
4162 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004163 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004164 * - a character reference is processed by appending the referenced
4165 * character to the attribute value
4166 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004167 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004168 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4169 * appending #x20 to the normalized value, except that only a single
4170 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004171 * parsed entity or the literal entity value of an internal parsed entity
4172 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004173 * If the declared value is not CDATA, then the XML processor must further
4174 * process the normalized attribute value by discarding any leading and
4175 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004176 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004177 * All attributes for which no declaration has been read should be treated
4178 * by a non-validating parser as if declared CDATA.
4179 *
4180 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4181 */
4182
4183
4184xmlChar *
4185xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004186 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004187 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004188}
4189
4190/**
Owen Taylor3473f882001-02-23 17:55:21 +00004191 * xmlParseSystemLiteral:
4192 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004193 *
Owen Taylor3473f882001-02-23 17:55:21 +00004194 * parse an XML Literal
4195 *
4196 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4197 *
4198 * Returns the SystemLiteral parsed or NULL
4199 */
4200
4201xmlChar *
4202xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4203 xmlChar *buf = NULL;
4204 int len = 0;
4205 int size = XML_PARSER_BUFFER_SIZE;
4206 int cur, l;
4207 xmlChar stop;
4208 int state = ctxt->instate;
4209 int count = 0;
4210
4211 SHRINK;
4212 if (RAW == '"') {
4213 NEXT;
4214 stop = '"';
4215 } else if (RAW == '\'') {
4216 NEXT;
4217 stop = '\'';
4218 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004219 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004220 return(NULL);
4221 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004222
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004223 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004224 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004225 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 return(NULL);
4227 }
4228 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4229 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004230 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004231 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004232 xmlChar *tmp;
4233
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004234 if ((size > XML_MAX_NAME_LENGTH) &&
4235 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4236 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4237 xmlFree(buf);
4238 ctxt->instate = (xmlParserInputState) state;
4239 return(NULL);
4240 }
Owen Taylor3473f882001-02-23 17:55:21 +00004241 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004242 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4243 if (tmp == NULL) {
4244 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004245 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 ctxt->instate = (xmlParserInputState) state;
4247 return(NULL);
4248 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004249 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004250 }
4251 count++;
4252 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004253 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004254 GROW;
4255 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004256 if (ctxt->instate == XML_PARSER_EOF) {
4257 xmlFree(buf);
4258 return(NULL);
4259 }
Owen Taylor3473f882001-02-23 17:55:21 +00004260 }
4261 COPY_BUF(l,buf,len,cur);
4262 NEXTL(l);
4263 cur = CUR_CHAR(l);
4264 if (cur == 0) {
4265 GROW;
4266 SHRINK;
4267 cur = CUR_CHAR(l);
4268 }
4269 }
4270 buf[len] = 0;
4271 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004272 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004273 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004274 } else {
4275 NEXT;
4276 }
4277 return(buf);
4278}
4279
4280/**
4281 * xmlParsePubidLiteral:
4282 * @ctxt: an XML parser context
4283 *
4284 * parse an XML public literal
4285 *
4286 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4287 *
4288 * Returns the PubidLiteral parsed or NULL.
4289 */
4290
4291xmlChar *
4292xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4293 xmlChar *buf = NULL;
4294 int len = 0;
4295 int size = XML_PARSER_BUFFER_SIZE;
4296 xmlChar cur;
4297 xmlChar stop;
4298 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004299 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004300
4301 SHRINK;
4302 if (RAW == '"') {
4303 NEXT;
4304 stop = '"';
4305 } else if (RAW == '\'') {
4306 NEXT;
4307 stop = '\'';
4308 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004309 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004310 return(NULL);
4311 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004312 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004313 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004314 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004315 return(NULL);
4316 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004317 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004319 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004320 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004321 xmlChar *tmp;
4322
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004323 if ((size > XML_MAX_NAME_LENGTH) &&
4324 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4325 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4326 xmlFree(buf);
4327 return(NULL);
4328 }
Owen Taylor3473f882001-02-23 17:55:21 +00004329 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004330 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4331 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004332 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004333 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004334 return(NULL);
4335 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004336 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004337 }
4338 buf[len++] = cur;
4339 count++;
4340 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004341 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004342 GROW;
4343 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004344 if (ctxt->instate == XML_PARSER_EOF) {
4345 xmlFree(buf);
4346 return(NULL);
4347 }
Owen Taylor3473f882001-02-23 17:55:21 +00004348 }
4349 NEXT;
4350 cur = CUR;
4351 if (cur == 0) {
4352 GROW;
4353 SHRINK;
4354 cur = CUR;
4355 }
4356 }
4357 buf[len] = 0;
4358 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004359 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004360 } else {
4361 NEXT;
4362 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004363 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004364 return(buf);
4365}
4366
Daniel Veillard8ed10722009-08-20 19:17:36 +02004367static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004368
4369/*
4370 * used for the test in the inner loop of the char data testing
4371 */
4372static const unsigned char test_char_data[256] = {
4373 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4374 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4375 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4376 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4378 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4379 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4380 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4381 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4382 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4383 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4384 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4385 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4386 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4387 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4388 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4405};
4406
Owen Taylor3473f882001-02-23 17:55:21 +00004407/**
4408 * xmlParseCharData:
4409 * @ctxt: an XML parser context
4410 * @cdata: int indicating whether we are within a CDATA section
4411 *
4412 * parse a CharData section.
4413 * if we are within a CDATA section ']]>' marks an end of section.
4414 *
4415 * The right angle bracket (>) may be represented using the string "&gt;",
4416 * and must, for compatibility, be escaped using "&gt;" or a character
4417 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004418 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004419 *
4420 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4421 */
4422
4423void
4424xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004425 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004426 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004427 int line = ctxt->input->line;
4428 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004429 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004430
4431 SHRINK;
4432 GROW;
4433 /*
4434 * Accelerated common case where input don't need to be
4435 * modified before passing it to the handler.
4436 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004437 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004438 in = ctxt->input->cur;
4439 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004440get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004441 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004442 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004443 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004444 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004445 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004446 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004447 goto get_more_space;
4448 }
4449 if (*in == '<') {
4450 nbchar = in - ctxt->input->cur;
4451 if (nbchar > 0) {
4452 const xmlChar *tmp = ctxt->input->cur;
4453 ctxt->input->cur = in;
4454
Daniel Veillard34099b42004-11-04 17:34:35 +00004455 if ((ctxt->sax != NULL) &&
4456 (ctxt->sax->ignorableWhitespace !=
4457 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004458 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004459 if (ctxt->sax->ignorableWhitespace != NULL)
4460 ctxt->sax->ignorableWhitespace(ctxt->userData,
4461 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004462 } else {
4463 if (ctxt->sax->characters != NULL)
4464 ctxt->sax->characters(ctxt->userData,
4465 tmp, nbchar);
4466 if (*ctxt->space == -1)
4467 *ctxt->space = -2;
4468 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004469 } else if ((ctxt->sax != NULL) &&
4470 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004471 ctxt->sax->characters(ctxt->userData,
4472 tmp, nbchar);
4473 }
4474 }
4475 return;
4476 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004477
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004478get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004479 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004480 while (test_char_data[*in]) {
4481 in++;
4482 ccol++;
4483 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004484 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004485 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004486 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004487 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004488 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004489 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004490 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004491 }
4492 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004493 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004494 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004495 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004496 return;
4497 }
4498 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004499 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004500 goto get_more;
4501 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004502 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004503 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004504 if ((ctxt->sax != NULL) &&
4505 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004506 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004507 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004508 const xmlChar *tmp = ctxt->input->cur;
4509 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004510
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004511 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004512 if (ctxt->sax->ignorableWhitespace != NULL)
4513 ctxt->sax->ignorableWhitespace(ctxt->userData,
4514 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004515 } else {
4516 if (ctxt->sax->characters != NULL)
4517 ctxt->sax->characters(ctxt->userData,
4518 tmp, nbchar);
4519 if (*ctxt->space == -1)
4520 *ctxt->space = -2;
4521 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004522 line = ctxt->input->line;
4523 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004524 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004525 if (ctxt->sax->characters != NULL)
4526 ctxt->sax->characters(ctxt->userData,
4527 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004528 line = ctxt->input->line;
4529 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004530 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004531 /* something really bad happened in the SAX callback */
4532 if (ctxt->instate != XML_PARSER_CONTENT)
4533 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004534 }
4535 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004536 if (*in == 0xD) {
4537 in++;
4538 if (*in == 0xA) {
4539 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004540 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004541 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004542 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004543 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004544 in--;
4545 }
4546 if (*in == '<') {
4547 return;
4548 }
4549 if (*in == '&') {
4550 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004551 }
4552 SHRINK;
4553 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004554 if (ctxt->instate == XML_PARSER_EOF)
4555 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004556 in = ctxt->input->cur;
Haibo Huangd23e46c2020-10-28 22:26:09 -07004557 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004558 nbchar = 0;
4559 }
Daniel Veillard50582112001-03-26 22:52:16 +00004560 ctxt->input->line = line;
4561 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004562 xmlParseCharDataComplex(ctxt, cdata);
4563}
4564
Daniel Veillard01c13b52002-12-10 15:19:08 +00004565/**
4566 * xmlParseCharDataComplex:
4567 * @ctxt: an XML parser context
4568 * @cdata: int indicating whether we are within a CDATA section
4569 *
4570 * parse a CharData section.this is the fallback function
4571 * of xmlParseCharData() when the parsing requires handling
4572 * of non-ASCII characters.
4573 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004574static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004575xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004576 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4577 int nbchar = 0;
4578 int cur, l;
4579 int count = 0;
4580
4581 SHRINK;
4582 GROW;
4583 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004584 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004585 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004586 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004587 if ((cur == ']') && (NXT(1) == ']') &&
4588 (NXT(2) == '>')) {
4589 if (cdata) break;
4590 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004592 }
4593 }
4594 COPY_BUF(l,buf,nbchar,cur);
Elliott Hughesecdab2a2022-02-23 14:33:50 -08004595 /* move current position before possible calling of ctxt->sax->characters */
4596 NEXTL(l);
4597 cur = CUR_CHAR(l);
Owen Taylor3473f882001-02-23 17:55:21 +00004598 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004599 buf[nbchar] = 0;
4600
Owen Taylor3473f882001-02-23 17:55:21 +00004601 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004602 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004603 */
4604 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004605 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004606 if (ctxt->sax->ignorableWhitespace != NULL)
4607 ctxt->sax->ignorableWhitespace(ctxt->userData,
4608 buf, nbchar);
4609 } else {
4610 if (ctxt->sax->characters != NULL)
4611 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004612 if ((ctxt->sax->characters !=
4613 ctxt->sax->ignorableWhitespace) &&
4614 (*ctxt->space == -1))
4615 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 }
4617 }
4618 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004619 /* something really bad happened in the SAX callback */
4620 if (ctxt->instate != XML_PARSER_CONTENT)
4621 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004622 }
4623 count++;
4624 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004625 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004626 GROW;
4627 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004628 if (ctxt->instate == XML_PARSER_EOF)
4629 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004630 }
Owen Taylor3473f882001-02-23 17:55:21 +00004631 }
4632 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004633 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004634 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004635 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004636 */
4637 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004638 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004639 if (ctxt->sax->ignorableWhitespace != NULL)
4640 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4641 } else {
4642 if (ctxt->sax->characters != NULL)
4643 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004644 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4645 (*ctxt->space == -1))
4646 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004647 }
4648 }
4649 }
Nick Wellnhofer69936b12017-08-30 14:16:01 +02004650 if ((cur != 0) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004651 /* Generate the error and skip the offending character */
4652 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4653 "PCDATA invalid Char value %d\n",
4654 cur);
4655 NEXTL(l);
4656 }
Owen Taylor3473f882001-02-23 17:55:21 +00004657}
4658
4659/**
4660 * xmlParseExternalID:
4661 * @ctxt: an XML parser context
4662 * @publicID: a xmlChar** receiving PubidLiteral
4663 * @strict: indicate whether we should restrict parsing to only
4664 * production [75], see NOTE below
4665 *
4666 * Parse an External ID or a Public ID
4667 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004668 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004669 * 'PUBLIC' S PubidLiteral S SystemLiteral
4670 *
4671 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4672 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4673 *
4674 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4675 *
4676 * Returns the function returns SystemLiteral and in the second
4677 * case publicID receives PubidLiteral, is strict is off
4678 * it is possible to return NULL and have publicID set.
4679 */
4680
4681xmlChar *
4682xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4683 xmlChar *URI = NULL;
4684
4685 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004686
4687 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004688 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004689 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004690 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004691 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4692 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004693 }
Owen Taylor3473f882001-02-23 17:55:21 +00004694 URI = xmlParseSystemLiteral(ctxt);
4695 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004696 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004697 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004698 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004699 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004700 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004701 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004702 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004703 }
Owen Taylor3473f882001-02-23 17:55:21 +00004704 *publicID = xmlParsePubidLiteral(ctxt);
4705 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004706 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004707 }
4708 if (strict) {
4709 /*
4710 * We don't handle [83] so "S SystemLiteral" is required.
4711 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004712 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004714 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004715 }
4716 } else {
4717 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004718 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004719 * "S SystemLiteral" is not detected. We skip blanks if no
4720 * system literal was found, but this is harmless since we must
4721 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004722 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004723 if (SKIP_BLANKS == 0) return(NULL);
4724 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004725 }
Owen Taylor3473f882001-02-23 17:55:21 +00004726 URI = xmlParseSystemLiteral(ctxt);
4727 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004728 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004729 }
4730 }
4731 return(URI);
4732}
4733
4734/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004735 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004736 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004737 * @buf: the already parsed part of the buffer
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004738 * @len: number of bytes in the buffer
Daniel Veillard4c778d82005-01-23 17:37:44 +00004739 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004740 *
4741 * Skip an XML (SGML) comment <!-- .... -->
4742 * The spec says that "For compatibility, the string "--" (double-hyphen)
4743 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004744 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004745 *
4746 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4747 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004748static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004749xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4750 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004751 int q, ql;
4752 int r, rl;
4753 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004754 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004755 int inputid;
4756
4757 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004758
Owen Taylor3473f882001-02-23 17:55:21 +00004759 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004760 len = 0;
4761 size = XML_PARSER_BUFFER_SIZE;
4762 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4763 if (buf == NULL) {
4764 xmlErrMemory(ctxt, NULL);
4765 return;
4766 }
Owen Taylor3473f882001-02-23 17:55:21 +00004767 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004768 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004769 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004770 if (q == 0)
4771 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004772 if (!IS_CHAR(q)) {
4773 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4774 "xmlParseComment: invalid xmlChar value %d\n",
4775 q);
4776 xmlFree (buf);
4777 return;
4778 }
Owen Taylor3473f882001-02-23 17:55:21 +00004779 NEXTL(ql);
4780 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004781 if (r == 0)
4782 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004783 if (!IS_CHAR(r)) {
4784 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4785 "xmlParseComment: invalid xmlChar value %d\n",
4786 q);
4787 xmlFree (buf);
4788 return;
4789 }
Owen Taylor3473f882001-02-23 17:55:21 +00004790 NEXTL(rl);
4791 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004792 if (cur == 0)
4793 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004794 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004795 ((cur != '>') ||
4796 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004797 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004798 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004799 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004800 if ((len > XML_MAX_TEXT_LENGTH) &&
4801 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4802 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4803 "Comment too big found", NULL);
4804 xmlFree (buf);
4805 return;
4806 }
Owen Taylor3473f882001-02-23 17:55:21 +00004807 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004808 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004809 size_t new_size;
4810
4811 new_size = size * 2;
4812 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004813 if (new_buf == NULL) {
4814 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004815 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004816 return;
4817 }
William M. Bracka3215c72004-07-31 16:24:01 +00004818 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004819 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004820 }
4821 COPY_BUF(ql,buf,len,q);
4822 q = r;
4823 ql = rl;
4824 r = cur;
4825 rl = l;
4826
4827 count++;
4828 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08004829 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00004830 GROW;
4831 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004832 if (ctxt->instate == XML_PARSER_EOF) {
4833 xmlFree(buf);
4834 return;
4835 }
Owen Taylor3473f882001-02-23 17:55:21 +00004836 }
4837 NEXTL(l);
4838 cur = CUR_CHAR(l);
4839 if (cur == 0) {
4840 SHRINK;
4841 GROW;
4842 cur = CUR_CHAR(l);
4843 }
4844 }
4845 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004846 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004847 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004848 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004849 } else if (!IS_CHAR(cur)) {
4850 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4851 "xmlParseComment: invalid xmlChar value %d\n",
4852 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004854 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004855 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004856 "Comment doesn't start and stop in the same"
4857 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004858 }
4859 NEXT;
4860 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4861 (!ctxt->disableSAX))
4862 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004863 }
Daniel Veillardda629342007-08-01 07:49:06 +00004864 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004865 return;
4866not_terminated:
4867 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4868 "Comment not terminated\n", NULL);
4869 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004870 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004871}
Daniel Veillardda629342007-08-01 07:49:06 +00004872
Daniel Veillard4c778d82005-01-23 17:37:44 +00004873/**
4874 * xmlParseComment:
4875 * @ctxt: an XML parser context
4876 *
4877 * Skip an XML (SGML) comment <!-- .... -->
4878 * The spec says that "For compatibility, the string "--" (double-hyphen)
4879 * must not occur within comments. "
4880 *
4881 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4882 */
4883void
4884xmlParseComment(xmlParserCtxtPtr ctxt) {
4885 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004886 size_t size = XML_PARSER_BUFFER_SIZE;
4887 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004888 xmlParserInputState state;
4889 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004890 size_t nbchar = 0;
4891 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004892 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004893
4894 /*
4895 * Check that there is a comment right here.
4896 */
4897 if ((RAW != '<') || (NXT(1) != '!') ||
4898 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004899 state = ctxt->instate;
4900 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004901 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004902 SKIP(4);
4903 SHRINK;
4904 GROW;
4905
4906 /*
4907 * Accelerated common case where input don't need to be
4908 * modified before passing it to the handler.
4909 */
4910 in = ctxt->input->cur;
4911 do {
4912 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004913 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004914 ctxt->input->line++; ctxt->input->col = 1;
4915 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004916 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004917 }
4918get_more:
4919 ccol = ctxt->input->col;
4920 while (((*in > '-') && (*in <= 0x7F)) ||
4921 ((*in >= 0x20) && (*in < '-')) ||
4922 (*in == 0x09)) {
4923 in++;
4924 ccol++;
4925 }
4926 ctxt->input->col = ccol;
4927 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004928 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004929 ctxt->input->line++; ctxt->input->col = 1;
4930 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004931 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004932 goto get_more;
4933 }
4934 nbchar = in - ctxt->input->cur;
4935 /*
4936 * save current set of data
4937 */
4938 if (nbchar > 0) {
4939 if ((ctxt->sax != NULL) &&
4940 (ctxt->sax->comment != NULL)) {
4941 if (buf == NULL) {
4942 if ((*in == '-') && (in[1] == '-'))
4943 size = nbchar + 1;
4944 else
4945 size = XML_PARSER_BUFFER_SIZE + nbchar;
4946 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4947 if (buf == NULL) {
4948 xmlErrMemory(ctxt, NULL);
4949 ctxt->instate = state;
4950 return;
4951 }
4952 len = 0;
4953 } else if (len + nbchar + 1 >= size) {
4954 xmlChar *new_buf;
4955 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4956 new_buf = (xmlChar *) xmlRealloc(buf,
4957 size * sizeof(xmlChar));
4958 if (new_buf == NULL) {
4959 xmlFree (buf);
4960 xmlErrMemory(ctxt, NULL);
4961 ctxt->instate = state;
4962 return;
4963 }
4964 buf = new_buf;
4965 }
4966 memcpy(&buf[len], ctxt->input->cur, nbchar);
4967 len += nbchar;
4968 buf[len] = 0;
4969 }
4970 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004971 if ((len > XML_MAX_TEXT_LENGTH) &&
4972 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4973 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4974 "Comment too big found", NULL);
4975 xmlFree (buf);
4976 return;
4977 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004978 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004979 if (*in == 0xA) {
4980 in++;
4981 ctxt->input->line++; ctxt->input->col = 1;
4982 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004983 if (*in == 0xD) {
4984 in++;
4985 if (*in == 0xA) {
4986 ctxt->input->cur = in;
4987 in++;
4988 ctxt->input->line++; ctxt->input->col = 1;
Elliott Hughesecdab2a2022-02-23 14:33:50 -08004989 goto get_more;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004990 }
4991 in--;
4992 }
4993 SHRINK;
4994 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004995 if (ctxt->instate == XML_PARSER_EOF) {
4996 xmlFree(buf);
4997 return;
4998 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004999 in = ctxt->input->cur;
5000 if (*in == '-') {
5001 if (in[1] == '-') {
5002 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005003 if (ctxt->input->id != inputid) {
5004 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005005 "comment doesn't start and stop in the"
5006 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00005007 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005008 SKIP(3);
5009 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5010 (!ctxt->disableSAX)) {
5011 if (buf != NULL)
5012 ctxt->sax->comment(ctxt->userData, buf);
5013 else
5014 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5015 }
5016 if (buf != NULL)
5017 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005018 if (ctxt->instate != XML_PARSER_EOF)
5019 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005020 return;
5021 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005022 if (buf != NULL) {
5023 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5024 "Double hyphen within comment: "
5025 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005026 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005027 } else
5028 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5029 "Double hyphen within comment\n", NULL);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005030 if (ctxt->instate == XML_PARSER_EOF) {
5031 xmlFree(buf);
5032 return;
5033 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005034 in++;
5035 ctxt->input->col++;
5036 }
5037 in++;
5038 ctxt->input->col++;
5039 goto get_more;
5040 }
Haibo Huangd23e46c2020-10-28 22:26:09 -07005041 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
Daniel Veillard4c778d82005-01-23 17:37:44 +00005042 xmlParseCommentComplex(ctxt, buf, len, size);
5043 ctxt->instate = state;
5044 return;
5045}
5046
Owen Taylor3473f882001-02-23 17:55:21 +00005047
5048/**
5049 * xmlParsePITarget:
5050 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005051 *
Owen Taylor3473f882001-02-23 17:55:21 +00005052 * parse the name of a PI
5053 *
5054 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5055 *
5056 * Returns the PITarget name or NULL
5057 */
5058
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005059const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005060xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005061 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005062
5063 name = xmlParseName(ctxt);
5064 if ((name != NULL) &&
5065 ((name[0] == 'x') || (name[0] == 'X')) &&
5066 ((name[1] == 'm') || (name[1] == 'M')) &&
5067 ((name[2] == 'l') || (name[2] == 'L'))) {
5068 int i;
5069 if ((name[0] == 'x') && (name[1] == 'm') &&
5070 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005071 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005072 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005073 return(name);
5074 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005075 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005076 return(name);
5077 }
5078 for (i = 0;;i++) {
5079 if (xmlW3CPIs[i] == NULL) break;
5080 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5081 return(name);
5082 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005083 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5084 "xmlParsePITarget: invalid name prefix 'xml'\n",
5085 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005086 }
Daniel Veillard37334572008-07-31 08:20:02 +00005087 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005088 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005089 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005090 }
Owen Taylor3473f882001-02-23 17:55:21 +00005091 return(name);
5092}
5093
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005094#ifdef LIBXML_CATALOG_ENABLED
5095/**
5096 * xmlParseCatalogPI:
5097 * @ctxt: an XML parser context
5098 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005099 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005100 * parse an XML Catalog Processing Instruction.
5101 *
5102 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5103 *
5104 * Occurs only if allowed by the user and if happening in the Misc
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005105 * part of the document before any doctype information
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005106 * This will add the given catalog to the parsing context in order
5107 * to be used if there is a resolution need further down in the document
5108 */
5109
5110static void
5111xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5112 xmlChar *URL = NULL;
5113 const xmlChar *tmp, *base;
5114 xmlChar marker;
5115
5116 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005117 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005118 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5119 goto error;
5120 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005121 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005122 if (*tmp != '=') {
5123 return;
5124 }
5125 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005126 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005127 marker = *tmp;
5128 if ((marker != '\'') && (marker != '"'))
5129 goto error;
5130 tmp++;
5131 base = tmp;
5132 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5133 if (*tmp == 0)
5134 goto error;
5135 URL = xmlStrndup(base, tmp - base);
5136 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005137 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005138 if (*tmp != 0)
5139 goto error;
5140
5141 if (URL != NULL) {
5142 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5143 xmlFree(URL);
5144 }
5145 return;
5146
5147error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005148 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5149 "Catalog PI syntax error: %s\n",
5150 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005151 if (URL != NULL)
5152 xmlFree(URL);
5153}
5154#endif
5155
Owen Taylor3473f882001-02-23 17:55:21 +00005156/**
5157 * xmlParsePI:
5158 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005159 *
Owen Taylor3473f882001-02-23 17:55:21 +00005160 * parse an XML Processing Instruction.
5161 *
5162 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5163 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005164 * The processing is transferred to SAX once parsed.
Owen Taylor3473f882001-02-23 17:55:21 +00005165 */
5166
5167void
5168xmlParsePI(xmlParserCtxtPtr ctxt) {
5169 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005170 size_t len = 0;
5171 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005172 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005173 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005174 xmlParserInputState state;
5175 int count = 0;
5176
5177 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005178 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005179 state = ctxt->instate;
5180 ctxt->instate = XML_PARSER_PI;
5181 /*
5182 * this is a Processing Instruction.
5183 */
5184 SKIP(2);
5185 SHRINK;
5186
5187 /*
5188 * Parse the target name and check for special support like
5189 * namespace.
5190 */
5191 target = xmlParsePITarget(ctxt);
5192 if (target != NULL) {
5193 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005194 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005195 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005196 "PI declaration doesn't start and stop in"
5197 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005198 }
5199 SKIP(2);
5200
5201 /*
5202 * SAX: PI detected.
5203 */
5204 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5205 (ctxt->sax->processingInstruction != NULL))
5206 ctxt->sax->processingInstruction(ctxt->userData,
5207 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005208 if (ctxt->instate != XML_PARSER_EOF)
5209 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005210 return;
5211 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005212 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005213 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005214 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005215 ctxt->instate = state;
5216 return;
5217 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005218 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005219 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5220 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 }
Owen Taylor3473f882001-02-23 17:55:21 +00005222 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005223 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005224 ((cur != '?') || (NXT(1) != '>'))) {
5225 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005226 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005227 size_t new_size = size * 2;
5228 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005229 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005230 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005231 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005232 ctxt->instate = state;
5233 return;
5234 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005235 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005236 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005237 }
5238 count++;
5239 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08005240 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00005241 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005242 if (ctxt->instate == XML_PARSER_EOF) {
5243 xmlFree(buf);
5244 return;
5245 }
Owen Taylor3473f882001-02-23 17:55:21 +00005246 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005247 if ((len > XML_MAX_TEXT_LENGTH) &&
5248 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5249 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5250 "PI %s too big found", target);
5251 xmlFree(buf);
5252 ctxt->instate = state;
5253 return;
5254 }
Owen Taylor3473f882001-02-23 17:55:21 +00005255 }
5256 COPY_BUF(l,buf,len,cur);
5257 NEXTL(l);
5258 cur = CUR_CHAR(l);
5259 if (cur == 0) {
5260 SHRINK;
5261 GROW;
5262 cur = CUR_CHAR(l);
5263 }
5264 }
Daniel Veillard51304812012-07-19 20:34:26 +08005265 if ((len > XML_MAX_TEXT_LENGTH) &&
5266 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5267 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5268 "PI %s too big found", target);
5269 xmlFree(buf);
5270 ctxt->instate = state;
5271 return;
5272 }
Owen Taylor3473f882001-02-23 17:55:21 +00005273 buf[len] = 0;
5274 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005275 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5276 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005277 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005278 if (inputid != ctxt->input->id) {
5279 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5280 "PI declaration doesn't start and stop in"
5281 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005282 }
5283 SKIP(2);
5284
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005285#ifdef LIBXML_CATALOG_ENABLED
5286 if (((state == XML_PARSER_MISC) ||
5287 (state == XML_PARSER_START)) &&
5288 (xmlStrEqual(target, XML_CATALOG_PI))) {
5289 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5290 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5291 (allow == XML_CATA_ALLOW_ALL))
5292 xmlParseCatalogPI(ctxt, buf);
5293 }
5294#endif
5295
5296
Owen Taylor3473f882001-02-23 17:55:21 +00005297 /*
5298 * SAX: PI detected.
5299 */
5300 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5301 (ctxt->sax->processingInstruction != NULL))
5302 ctxt->sax->processingInstruction(ctxt->userData,
5303 target, buf);
5304 }
5305 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005306 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005307 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 }
Chris Evans77404b82011-12-14 16:18:25 +08005309 if (ctxt->instate != XML_PARSER_EOF)
5310 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005311 }
5312}
5313
5314/**
5315 * xmlParseNotationDecl:
5316 * @ctxt: an XML parser context
5317 *
5318 * parse a notation declaration
5319 *
5320 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5321 *
5322 * Hence there is actually 3 choices:
5323 * 'PUBLIC' S PubidLiteral
5324 * 'PUBLIC' S PubidLiteral S SystemLiteral
5325 * and 'SYSTEM' S SystemLiteral
5326 *
5327 * See the NOTE on xmlParseExternalID().
5328 */
5329
5330void
5331xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005332 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005333 xmlChar *Pubid;
5334 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005335
Daniel Veillarda07050d2003-10-19 14:46:32 +00005336 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005337 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005338 SHRINK;
5339 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005340 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5342 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005343 return;
5344 }
Owen Taylor3473f882001-02-23 17:55:21 +00005345
Daniel Veillard76d66f42001-05-16 21:05:17 +00005346 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005347 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005348 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005349 return;
5350 }
Daniel Veillard37334572008-07-31 08:20:02 +00005351 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005352 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005353 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005354 name, NULL, NULL);
5355 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005356 if (SKIP_BLANKS == 0) {
5357 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5358 "Space required after the NOTATION name'\n");
5359 return;
5360 }
Owen Taylor3473f882001-02-23 17:55:21 +00005361
5362 /*
5363 * Parse the IDs.
5364 */
5365 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5366 SKIP_BLANKS;
5367
5368 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005369 if (inputid != ctxt->input->id) {
5370 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5371 "Notation declaration doesn't start and stop"
5372 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005373 }
5374 NEXT;
5375 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376 (ctxt->sax->notationDecl != NULL))
5377 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005379 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005380 }
Owen Taylor3473f882001-02-23 17:55:21 +00005381 if (Systemid != NULL) xmlFree(Systemid);
5382 if (Pubid != NULL) xmlFree(Pubid);
5383 }
5384}
5385
5386/**
5387 * xmlParseEntityDecl:
5388 * @ctxt: an XML parser context
5389 *
5390 * parse <!ENTITY declarations
5391 *
5392 * [70] EntityDecl ::= GEDecl | PEDecl
5393 *
5394 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5395 *
5396 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5397 *
5398 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5399 *
5400 * [74] PEDef ::= EntityValue | ExternalID
5401 *
5402 * [76] NDataDecl ::= S 'NDATA' S Name
5403 *
5404 * [ VC: Notation Declared ]
5405 * The Name must match the declared name of a notation.
5406 */
5407
5408void
5409xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005410 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005411 xmlChar *value = NULL;
5412 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005413 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005414 int isParameter = 0;
5415 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005416
Daniel Veillard4c778d82005-01-23 17:37:44 +00005417 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005418 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005419 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005420 SHRINK;
5421 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005422 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005423 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5424 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005425 }
Owen Taylor3473f882001-02-23 17:55:21 +00005426
5427 if (RAW == '%') {
5428 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005429 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005430 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005431 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005432 }
Owen Taylor3473f882001-02-23 17:55:21 +00005433 isParameter = 1;
5434 }
5435
Daniel Veillard76d66f42001-05-16 21:05:17 +00005436 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005437 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005438 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5439 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005440 return;
5441 }
Daniel Veillard37334572008-07-31 08:20:02 +00005442 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005443 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005444 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005445 name, NULL, NULL);
5446 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005447 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005450 }
Owen Taylor3473f882001-02-23 17:55:21 +00005451
Daniel Veillardf5582f12002-06-11 10:08:16 +00005452 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005453 /*
5454 * handle the various case of definitions...
5455 */
5456 if (isParameter) {
5457 if ((RAW == '"') || (RAW == '\'')) {
5458 value = xmlParseEntityValue(ctxt, &orig);
5459 if (value) {
5460 if ((ctxt->sax != NULL) &&
5461 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5462 ctxt->sax->entityDecl(ctxt->userData, name,
5463 XML_INTERNAL_PARAMETER_ENTITY,
5464 NULL, NULL, value);
5465 }
5466 } else {
5467 URI = xmlParseExternalID(ctxt, &literal, 1);
5468 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005469 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005470 }
5471 if (URI) {
5472 xmlURIPtr uri;
5473
5474 uri = xmlParseURI((const char *) URI);
5475 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005476 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5477 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005478 /*
5479 * This really ought to be a well formedness error
5480 * but the XML Core WG decided otherwise c.f. issue
5481 * E26 of the XML erratas.
5482 */
Owen Taylor3473f882001-02-23 17:55:21 +00005483 } else {
5484 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005485 /*
5486 * Okay this is foolish to block those but not
5487 * invalid URIs.
5488 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005489 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005490 } else {
5491 if ((ctxt->sax != NULL) &&
5492 (!ctxt->disableSAX) &&
5493 (ctxt->sax->entityDecl != NULL))
5494 ctxt->sax->entityDecl(ctxt->userData, name,
5495 XML_EXTERNAL_PARAMETER_ENTITY,
5496 literal, URI, NULL);
5497 }
5498 xmlFreeURI(uri);
5499 }
5500 }
5501 }
5502 } else {
5503 if ((RAW == '"') || (RAW == '\'')) {
5504 value = xmlParseEntityValue(ctxt, &orig);
5505 if ((ctxt->sax != NULL) &&
5506 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5507 ctxt->sax->entityDecl(ctxt->userData, name,
5508 XML_INTERNAL_GENERAL_ENTITY,
5509 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005510 /*
5511 * For expat compatibility in SAX mode.
5512 */
5513 if ((ctxt->myDoc == NULL) ||
5514 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5515 if (ctxt->myDoc == NULL) {
5516 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005517 if (ctxt->myDoc == NULL) {
5518 xmlErrMemory(ctxt, "New Doc failed");
5519 return;
5520 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005521 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005522 }
5523 if (ctxt->myDoc->intSubset == NULL)
5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 BAD_CAST "fake", NULL, NULL);
5526
Daniel Veillard1af9a412003-08-20 22:54:39 +00005527 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5528 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005529 }
Owen Taylor3473f882001-02-23 17:55:21 +00005530 } else {
5531 URI = xmlParseExternalID(ctxt, &literal, 1);
5532 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005533 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005534 }
5535 if (URI) {
5536 xmlURIPtr uri;
5537
5538 uri = xmlParseURI((const char *)URI);
5539 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005540 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5541 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005542 /*
5543 * This really ought to be a well formedness error
5544 * but the XML Core WG decided otherwise c.f. issue
5545 * E26 of the XML erratas.
5546 */
Owen Taylor3473f882001-02-23 17:55:21 +00005547 } else {
5548 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005549 /*
5550 * Okay this is foolish to block those but not
5551 * invalid URIs.
5552 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005553 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005554 }
5555 xmlFreeURI(uri);
5556 }
5557 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005558 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005561 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005562 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005563 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005564 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005567 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005568 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5570 (ctxt->sax->unparsedEntityDecl != NULL))
5571 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5572 literal, URI, ndata);
5573 } else {
5574 if ((ctxt->sax != NULL) &&
5575 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5576 ctxt->sax->entityDecl(ctxt->userData, name,
5577 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5578 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005579 /*
5580 * For expat compatibility in SAX mode.
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005581 * assuming the entity replacement was asked for
Daniel Veillard5997aca2002-03-18 18:36:20 +00005582 */
5583 if ((ctxt->replaceEntities != 0) &&
5584 ((ctxt->myDoc == NULL) ||
5585 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5586 if (ctxt->myDoc == NULL) {
5587 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005588 if (ctxt->myDoc == NULL) {
5589 xmlErrMemory(ctxt, "New Doc failed");
5590 return;
5591 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005592 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005593 }
5594
5595 if (ctxt->myDoc->intSubset == NULL)
5596 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5597 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005598 xmlSAX2EntityDecl(ctxt, name,
5599 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5600 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005601 }
Owen Taylor3473f882001-02-23 17:55:21 +00005602 }
5603 }
5604 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005605 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005606 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005607 SKIP_BLANKS;
5608 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005609 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005610 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005611 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005612 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005613 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005614 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005615 "Entity declaration doesn't start and stop in"
5616 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005617 }
5618 NEXT;
5619 }
5620 if (orig != NULL) {
5621 /*
5622 * Ugly mechanism to save the raw entity value.
5623 */
5624 xmlEntityPtr cur = NULL;
5625
5626 if (isParameter) {
5627 if ((ctxt->sax != NULL) &&
5628 (ctxt->sax->getParameterEntity != NULL))
5629 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5630 } else {
5631 if ((ctxt->sax != NULL) &&
5632 (ctxt->sax->getEntity != NULL))
5633 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005634 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005635 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005636 }
Owen Taylor3473f882001-02-23 17:55:21 +00005637 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005638 if ((cur != NULL) && (cur->orig == NULL)) {
5639 cur->orig = orig;
5640 orig = NULL;
5641 }
Owen Taylor3473f882001-02-23 17:55:21 +00005642 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005643
5644done:
Owen Taylor3473f882001-02-23 17:55:21 +00005645 if (value != NULL) xmlFree(value);
5646 if (URI != NULL) xmlFree(URI);
5647 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005648 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005649 }
5650}
5651
5652/**
5653 * xmlParseDefaultDecl:
5654 * @ctxt: an XML parser context
5655 * @value: Receive a possible fixed default value for the attribute
5656 *
5657 * Parse an attribute default declaration
5658 *
5659 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5660 *
5661 * [ VC: Required Attribute ]
5662 * if the default declaration is the keyword #REQUIRED, then the
5663 * attribute must be specified for all elements of the type in the
5664 * attribute-list declaration.
5665 *
5666 * [ VC: Attribute Default Legal ]
5667 * The declared default value must meet the lexical constraints of
5668 * the declared attribute type c.f. xmlValidateAttributeDecl()
5669 *
5670 * [ VC: Fixed Attribute Default ]
5671 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005672 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005673 *
5674 * [ WFC: No < in Attribute Values ]
5675 * handled in xmlParseAttValue()
5676 *
5677 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005678 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005679 */
5680
5681int
5682xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5683 int val;
5684 xmlChar *ret;
5685
5686 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005687 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005688 SKIP(9);
5689 return(XML_ATTRIBUTE_REQUIRED);
5690 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005691 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005692 SKIP(8);
5693 return(XML_ATTRIBUTE_IMPLIED);
5694 }
5695 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005696 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005697 SKIP(6);
5698 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005699 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005702 }
Owen Taylor3473f882001-02-23 17:55:21 +00005703 }
5704 ret = xmlParseAttValue(ctxt);
5705 ctxt->instate = XML_PARSER_DTD;
5706 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005707 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005708 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005709 } else
5710 *value = ret;
5711 return(val);
5712}
5713
5714/**
5715 * xmlParseNotationType:
5716 * @ctxt: an XML parser context
5717 *
5718 * parse an Notation attribute type.
5719 *
5720 * Note: the leading 'NOTATION' S part has already being parsed...
5721 *
5722 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5723 *
5724 * [ VC: Notation Attributes ]
5725 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005726 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005727 *
5728 * Returns: the notation attribute tree built while parsing
5729 */
5730
5731xmlEnumerationPtr
5732xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005733 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005734 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005735
5736 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005737 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005738 return(NULL);
5739 }
5740 SHRINK;
5741 do {
5742 NEXT;
5743 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005744 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005748 xmlFreeEnumeration(ret);
5749 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005751 tmp = ret;
5752 while (tmp != NULL) {
5753 if (xmlStrEqual(name, tmp->name)) {
5754 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5755 "standalone: attribute notation value token %s duplicated\n",
5756 name, NULL);
5757 if (!xmlDictOwns(ctxt->dict, name))
5758 xmlFree((xmlChar *) name);
5759 break;
5760 }
5761 tmp = tmp->next;
5762 }
5763 if (tmp == NULL) {
5764 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005765 if (cur == NULL) {
5766 xmlFreeEnumeration(ret);
5767 return(NULL);
5768 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005769 if (last == NULL) ret = last = cur;
5770 else {
5771 last->next = cur;
5772 last = cur;
5773 }
Owen Taylor3473f882001-02-23 17:55:21 +00005774 }
5775 SKIP_BLANKS;
5776 } while (RAW == '|');
5777 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005778 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005779 xmlFreeEnumeration(ret);
5780 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005781 }
5782 NEXT;
5783 return(ret);
5784}
5785
5786/**
5787 * xmlParseEnumerationType:
5788 * @ctxt: an XML parser context
5789 *
5790 * parse an Enumeration attribute type.
5791 *
5792 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5793 *
5794 * [ VC: Enumeration ]
5795 * Values of this type must match one of the Nmtoken tokens in
5796 * the declaration
5797 *
5798 * Returns: the enumeration attribute tree built while parsing
5799 */
5800
5801xmlEnumerationPtr
5802xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5803 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005804 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005805
5806 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005807 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005808 return(NULL);
5809 }
5810 SHRINK;
5811 do {
5812 NEXT;
5813 SKIP_BLANKS;
5814 name = xmlParseNmtoken(ctxt);
5815 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005816 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005817 return(ret);
5818 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005819 tmp = ret;
5820 while (tmp != NULL) {
5821 if (xmlStrEqual(name, tmp->name)) {
5822 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5823 "standalone: attribute enumeration value token %s duplicated\n",
5824 name, NULL);
5825 if (!xmlDictOwns(ctxt->dict, name))
5826 xmlFree(name);
5827 break;
5828 }
5829 tmp = tmp->next;
5830 }
5831 if (tmp == NULL) {
5832 cur = xmlCreateEnumeration(name);
5833 if (!xmlDictOwns(ctxt->dict, name))
5834 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005835 if (cur == NULL) {
5836 xmlFreeEnumeration(ret);
5837 return(NULL);
5838 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005839 if (last == NULL) ret = last = cur;
5840 else {
5841 last->next = cur;
5842 last = cur;
5843 }
Owen Taylor3473f882001-02-23 17:55:21 +00005844 }
5845 SKIP_BLANKS;
5846 } while (RAW == '|');
5847 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005848 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005849 return(ret);
5850 }
5851 NEXT;
5852 return(ret);
5853}
5854
5855/**
5856 * xmlParseEnumeratedType:
5857 * @ctxt: an XML parser context
5858 * @tree: the enumeration tree built while parsing
5859 *
5860 * parse an Enumerated attribute type.
5861 *
5862 * [57] EnumeratedType ::= NotationType | Enumeration
5863 *
5864 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5865 *
5866 *
5867 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5868 */
5869
5870int
5871xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005872 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005873 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005874 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005875 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5876 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005877 return(0);
5878 }
Owen Taylor3473f882001-02-23 17:55:21 +00005879 *tree = xmlParseNotationType(ctxt);
5880 if (*tree == NULL) return(0);
5881 return(XML_ATTRIBUTE_NOTATION);
5882 }
5883 *tree = xmlParseEnumerationType(ctxt);
5884 if (*tree == NULL) return(0);
5885 return(XML_ATTRIBUTE_ENUMERATION);
5886}
5887
5888/**
5889 * xmlParseAttributeType:
5890 * @ctxt: an XML parser context
5891 * @tree: the enumeration tree built while parsing
5892 *
5893 * parse the Attribute list def for an element
5894 *
5895 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5896 *
5897 * [55] StringType ::= 'CDATA'
5898 *
5899 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5900 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5901 *
5902 * Validity constraints for attribute values syntax are checked in
5903 * xmlValidateAttributeValue()
5904 *
5905 * [ VC: ID ]
5906 * Values of type ID must match the Name production. A name must not
5907 * appear more than once in an XML document as a value of this type;
5908 * i.e., ID values must uniquely identify the elements which bear them.
5909 *
5910 * [ VC: One ID per Element Type ]
5911 * No element type may have more than one ID attribute specified.
5912 *
5913 * [ VC: ID Attribute Default ]
5914 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5915 *
5916 * [ VC: IDREF ]
5917 * Values of type IDREF must match the Name production, and values
5918 * of type IDREFS must match Names; each IDREF Name must match the value
5919 * of an ID attribute on some element in the XML document; i.e. IDREF
5920 * values must match the value of some ID attribute.
5921 *
5922 * [ VC: Entity Name ]
5923 * Values of type ENTITY must match the Name production, values
5924 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005925 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005926 *
5927 * [ VC: Name Token ]
5928 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005929 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005930 *
5931 * Returns the attribute type
5932 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005933int
Owen Taylor3473f882001-02-23 17:55:21 +00005934xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5935 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005936 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005937 SKIP(5);
5938 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005939 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005940 SKIP(6);
5941 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005942 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005943 SKIP(5);
5944 return(XML_ATTRIBUTE_IDREF);
5945 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5946 SKIP(2);
5947 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005948 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005949 SKIP(6);
5950 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005951 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005952 SKIP(8);
5953 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005954 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005955 SKIP(8);
5956 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005957 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005958 SKIP(7);
5959 return(XML_ATTRIBUTE_NMTOKEN);
5960 }
5961 return(xmlParseEnumeratedType(ctxt, tree));
5962}
5963
5964/**
5965 * xmlParseAttributeListDecl:
5966 * @ctxt: an XML parser context
5967 *
5968 * : parse the Attribute list def for an element
5969 *
5970 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5971 *
5972 * [53] AttDef ::= S Name S AttType S DefaultDecl
5973 *
5974 */
5975void
5976xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005977 const xmlChar *elemName;
5978 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005979 xmlEnumerationPtr tree;
5980
Daniel Veillarda07050d2003-10-19 14:46:32 +00005981 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005982 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005983
5984 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005985 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005986 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005987 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005988 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005989 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005990 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005991 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5992 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005993 return;
5994 }
5995 SKIP_BLANKS;
5996 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005997 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005998 int type;
5999 int def;
6000 xmlChar *defaultValue = NULL;
6001
6002 GROW;
6003 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006004 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006005 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006006 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6007 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006008 break;
6009 }
6010 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006011 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006012 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006013 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006014 break;
6015 }
Owen Taylor3473f882001-02-23 17:55:21 +00006016
6017 type = xmlParseAttributeType(ctxt, &tree);
6018 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006019 break;
6020 }
6021
6022 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006023 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006024 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6025 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006026 if (tree != NULL)
6027 xmlFreeEnumeration(tree);
6028 break;
6029 }
Owen Taylor3473f882001-02-23 17:55:21 +00006030
6031 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6032 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006033 if (defaultValue != NULL)
6034 xmlFree(defaultValue);
6035 if (tree != NULL)
6036 xmlFreeEnumeration(tree);
6037 break;
6038 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006039 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6040 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006041
6042 GROW;
6043 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006044 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006045 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006046 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006047 if (defaultValue != NULL)
6048 xmlFree(defaultValue);
6049 if (tree != NULL)
6050 xmlFreeEnumeration(tree);
6051 break;
6052 }
Owen Taylor3473f882001-02-23 17:55:21 +00006053 }
Owen Taylor3473f882001-02-23 17:55:21 +00006054 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6055 (ctxt->sax->attributeDecl != NULL))
6056 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6057 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006058 else if (tree != NULL)
6059 xmlFreeEnumeration(tree);
6060
6061 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006062 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006063 (def != XML_ATTRIBUTE_REQUIRED)) {
6064 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6065 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006066 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006067 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6068 }
Owen Taylor3473f882001-02-23 17:55:21 +00006069 if (defaultValue != NULL)
6070 xmlFree(defaultValue);
6071 GROW;
6072 }
6073 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006074 if (inputid != ctxt->input->id) {
6075 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6076 "Attribute list declaration doesn't start and"
6077 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006078 }
6079 NEXT;
6080 }
Owen Taylor3473f882001-02-23 17:55:21 +00006081 }
6082}
6083
6084/**
6085 * xmlParseElementMixedContentDecl:
6086 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006087 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006088 *
6089 * parse the declaration for a Mixed Element content
6090 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006091 *
Owen Taylor3473f882001-02-23 17:55:21 +00006092 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6093 * '(' S? '#PCDATA' S? ')'
6094 *
6095 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6096 *
6097 * [ VC: No Duplicate Types ]
6098 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006099 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006100 *
6101 * returns: the list of the xmlElementContentPtr describing the element choices
6102 */
6103xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006104xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006105 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006106 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006107
6108 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006109 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006110 SKIP(7);
6111 SKIP_BLANKS;
6112 SHRINK;
6113 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006114 if (ctxt->input->id != inputchk) {
6115 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6116 "Element content declaration doesn't start and"
6117 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006118 }
Owen Taylor3473f882001-02-23 17:55:21 +00006119 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006120 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006121 if (ret == NULL)
6122 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006123 if (RAW == '*') {
6124 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6125 NEXT;
6126 }
6127 return(ret);
6128 }
6129 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006130 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006131 if (ret == NULL) return(NULL);
6132 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006133 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006134 NEXT;
6135 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006136 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Haibo Huangd75f3892021-01-05 21:34:50 -08006137 if (ret == NULL) {
6138 xmlFreeDocElementContent(ctxt->myDoc, cur);
6139 return(NULL);
6140 }
Owen Taylor3473f882001-02-23 17:55:21 +00006141 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006142 if (cur != NULL)
6143 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006144 cur = ret;
6145 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006146 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Haibo Huangd75f3892021-01-05 21:34:50 -08006147 if (n == NULL) {
6148 xmlFreeDocElementContent(ctxt->myDoc, ret);
6149 return(NULL);
6150 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006151 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006152 if (n->c1 != NULL)
6153 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006154 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006155 if (n != NULL)
6156 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006157 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006158 }
6159 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006160 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006161 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006162 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006163 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006164 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006165 return(NULL);
6166 }
6167 SKIP_BLANKS;
6168 GROW;
6169 }
6170 if ((RAW == ')') && (NXT(1) == '*')) {
6171 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006172 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006173 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006174 if (cur->c2 != NULL)
6175 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006176 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006177 if (ret != NULL)
6178 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006179 if (ctxt->input->id != inputchk) {
6180 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181 "Element content declaration doesn't start and"
6182 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006183 }
Owen Taylor3473f882001-02-23 17:55:21 +00006184 SKIP(2);
6185 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006186 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006187 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006188 return(NULL);
6189 }
6190
6191 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006192 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006193 }
6194 return(ret);
6195}
6196
6197/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006198 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006199 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006200 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006201 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006202 *
6203 * parse the declaration for a Mixed Element content
6204 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006205 *
Owen Taylor3473f882001-02-23 17:55:21 +00006206 *
6207 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6208 *
6209 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6210 *
6211 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6212 *
6213 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6214 *
6215 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6216 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006217 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006218 * opening or closing parentheses in a choice, seq, or Mixed
6219 * construct is contained in the replacement text for a parameter
6220 * entity, both must be contained in the same replacement text. For
6221 * interoperability, if a parameter-entity reference appears in a
6222 * choice, seq, or Mixed construct, its replacement text should not
6223 * be empty, and neither the first nor last non-blank character of
6224 * the replacement text should be a connector (| or ,).
6225 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006226 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006227 * hierarchy.
6228 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006229static xmlElementContentPtr
6230xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6231 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006232 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006233 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006234 xmlChar type = 0;
6235
Daniel Veillard489f9672009-08-10 16:49:30 +02006236 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6237 (depth > 2048)) {
6238 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6239"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6240 depth);
6241 return(NULL);
6242 }
Owen Taylor3473f882001-02-23 17:55:21 +00006243 SKIP_BLANKS;
6244 GROW;
6245 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006246 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006247
Owen Taylor3473f882001-02-23 17:55:21 +00006248 /* Recurse on first child */
6249 NEXT;
6250 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006251 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6252 depth + 1);
Elliott Hughes5cefca72021-05-06 13:23:15 -07006253 if (cur == NULL)
6254 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006255 SKIP_BLANKS;
6256 GROW;
6257 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006258 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006259 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006260 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006261 return(NULL);
6262 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006263 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006264 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006265 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006266 return(NULL);
6267 }
Owen Taylor3473f882001-02-23 17:55:21 +00006268 GROW;
6269 if (RAW == '?') {
6270 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6271 NEXT;
6272 } else if (RAW == '*') {
6273 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6274 NEXT;
6275 } else if (RAW == '+') {
6276 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6277 NEXT;
6278 } else {
6279 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6280 }
Owen Taylor3473f882001-02-23 17:55:21 +00006281 GROW;
6282 }
6283 SKIP_BLANKS;
6284 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006285 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006286 /*
6287 * Each loop we parse one separator and one element.
6288 */
6289 if (RAW == ',') {
6290 if (type == 0) type = CUR;
6291
6292 /*
6293 * Detect "Name | Name , Name" error
6294 */
6295 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006296 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006297 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006298 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006299 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006300 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006301 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006302 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006303 return(NULL);
6304 }
6305 NEXT;
6306
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006307 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006308 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006309 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006310 xmlFreeDocElementContent(ctxt->myDoc, last);
6311 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006312 return(NULL);
6313 }
6314 if (last == NULL) {
6315 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006316 if (ret != NULL)
6317 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006318 ret = cur = op;
6319 } else {
6320 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006321 if (op != NULL)
6322 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006323 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006324 if (last != NULL)
6325 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006326 cur =op;
6327 last = NULL;
6328 }
6329 } else if (RAW == '|') {
6330 if (type == 0) type = CUR;
6331
6332 /*
6333 * Detect "Name , Name | Name" error
6334 */
6335 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006336 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006337 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006338 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006339 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006340 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006341 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006342 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006343 return(NULL);
6344 }
6345 NEXT;
6346
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006347 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006348 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006349 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006350 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006351 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006352 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006353 return(NULL);
6354 }
6355 if (last == NULL) {
6356 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006357 if (ret != NULL)
6358 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006359 ret = cur = op;
6360 } else {
6361 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006362 if (op != NULL)
6363 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006364 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006365 if (last != NULL)
6366 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006367 cur =op;
6368 last = NULL;
6369 }
6370 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006371 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006372 if ((last != NULL) && (last != ret))
6373 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006374 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006375 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006376 return(NULL);
6377 }
6378 GROW;
6379 SKIP_BLANKS;
6380 GROW;
6381 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006382 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006383 /* Recurse on second child */
6384 NEXT;
6385 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006386 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6387 depth + 1);
Elliott Hughes5cefca72021-05-06 13:23:15 -07006388 if (last == NULL) {
6389 if (ret != NULL)
6390 xmlFreeDocElementContent(ctxt->myDoc, ret);
6391 return(NULL);
6392 }
Owen Taylor3473f882001-02-23 17:55:21 +00006393 SKIP_BLANKS;
6394 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006395 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006396 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006397 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006398 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006399 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006400 return(NULL);
6401 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006402 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006403 if (last == NULL) {
6404 if (ret != NULL)
6405 xmlFreeDocElementContent(ctxt->myDoc, ret);
6406 return(NULL);
6407 }
Owen Taylor3473f882001-02-23 17:55:21 +00006408 if (RAW == '?') {
6409 last->ocur = XML_ELEMENT_CONTENT_OPT;
6410 NEXT;
6411 } else if (RAW == '*') {
6412 last->ocur = XML_ELEMENT_CONTENT_MULT;
6413 NEXT;
6414 } else if (RAW == '+') {
6415 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6416 NEXT;
6417 } else {
6418 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6419 }
6420 }
6421 SKIP_BLANKS;
6422 GROW;
6423 }
6424 if ((cur != NULL) && (last != NULL)) {
6425 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006426 if (last != NULL)
6427 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006428 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006429 if (ctxt->input->id != inputchk) {
6430 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6431 "Element content declaration doesn't start and stop in"
6432 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006433 }
Owen Taylor3473f882001-02-23 17:55:21 +00006434 NEXT;
6435 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006436 if (ret != NULL) {
6437 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6438 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6440 else
6441 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6442 }
Owen Taylor3473f882001-02-23 17:55:21 +00006443 NEXT;
6444 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006445 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006446 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006447 cur = ret;
6448 /*
6449 * Some normalization:
6450 * (a | b* | c?)* == (a | b | c)*
6451 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006452 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006453 if ((cur->c1 != NULL) &&
6454 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6456 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6457 if ((cur->c2 != NULL) &&
6458 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6459 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6460 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6461 cur = cur->c2;
6462 }
6463 }
Owen Taylor3473f882001-02-23 17:55:21 +00006464 NEXT;
6465 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006466 if (ret != NULL) {
6467 int found = 0;
6468
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006469 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6470 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6471 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006472 else
6473 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006474 /*
6475 * Some normalization:
6476 * (a | b*)+ == (a | b)*
6477 * (a | b?)+ == (a | b)*
6478 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006479 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006480 if ((cur->c1 != NULL) &&
6481 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6484 found = 1;
6485 }
6486 if ((cur->c2 != NULL) &&
6487 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6490 found = 1;
6491 }
6492 cur = cur->c2;
6493 }
6494 if (found)
6495 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6496 }
Owen Taylor3473f882001-02-23 17:55:21 +00006497 NEXT;
6498 }
6499 return(ret);
6500}
6501
6502/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006503 * xmlParseElementChildrenContentDecl:
6504 * @ctxt: an XML parser context
6505 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006506 *
6507 * parse the declaration for a Mixed Element content
6508 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6509 *
6510 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6511 *
6512 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6513 *
6514 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6515 *
6516 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6517 *
6518 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6519 * TODO Parameter-entity replacement text must be properly nested
6520 * with parenthesized groups. That is to say, if either of the
6521 * opening or closing parentheses in a choice, seq, or Mixed
6522 * construct is contained in the replacement text for a parameter
6523 * entity, both must be contained in the same replacement text. For
6524 * interoperability, if a parameter-entity reference appears in a
6525 * choice, seq, or Mixed construct, its replacement text should not
6526 * be empty, and neither the first nor last non-blank character of
6527 * the replacement text should be a connector (| or ,).
6528 *
6529 * Returns the tree of xmlElementContentPtr describing the element
6530 * hierarchy.
6531 */
6532xmlElementContentPtr
6533xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6534 /* stub left for API/ABI compat */
6535 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6536}
6537
6538/**
Owen Taylor3473f882001-02-23 17:55:21 +00006539 * xmlParseElementContentDecl:
6540 * @ctxt: an XML parser context
6541 * @name: the name of the element being defined.
6542 * @result: the Element Content pointer will be stored here if any
6543 *
6544 * parse the declaration for an Element content either Mixed or Children,
6545 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006546 *
Owen Taylor3473f882001-02-23 17:55:21 +00006547 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6548 *
6549 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6550 */
6551
6552int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006553xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006554 xmlElementContentPtr *result) {
6555
6556 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006557 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006558 int res;
6559
6560 *result = NULL;
6561
6562 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006563 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006564 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 return(-1);
6566 }
6567 NEXT;
6568 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006569 if (ctxt->instate == XML_PARSER_EOF)
6570 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006571 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006572 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006573 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006574 res = XML_ELEMENT_TYPE_MIXED;
6575 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006576 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 res = XML_ELEMENT_TYPE_ELEMENT;
6578 }
Owen Taylor3473f882001-02-23 17:55:21 +00006579 SKIP_BLANKS;
6580 *result = tree;
6581 return(res);
6582}
6583
6584/**
6585 * xmlParseElementDecl:
6586 * @ctxt: an XML parser context
6587 *
6588 * parse an Element declaration.
6589 *
6590 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6591 *
6592 * [ VC: Unique Element Type Declaration ]
6593 * No element type may be declared more than once
6594 *
6595 * Returns the type of the element, or -1 in case of error
6596 */
6597int
6598xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006599 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006600 int ret = -1;
6601 xmlElementContentPtr content = NULL;
6602
Daniel Veillard4c778d82005-01-23 17:37:44 +00006603 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006604 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006605 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006606
6607 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006608 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6610 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006611 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006612 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006613 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006614 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006615 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6616 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006617 return(-1);
6618 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006619 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6621 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006622 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006623 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006624 SKIP(5);
6625 /*
6626 * Element must always be empty.
6627 */
6628 ret = XML_ELEMENT_TYPE_EMPTY;
6629 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6630 (NXT(2) == 'Y')) {
6631 SKIP(3);
6632 /*
6633 * Element is a generic container.
6634 */
6635 ret = XML_ELEMENT_TYPE_ANY;
6636 } else if (RAW == '(') {
6637 ret = xmlParseElementContentDecl(ctxt, name, &content);
6638 } else {
6639 /*
6640 * [ WFC: PEs in Internal Subset ] error handling.
6641 */
6642 if ((RAW == '%') && (ctxt->external == 0) &&
6643 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006644 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006645 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006646 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006647 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006648 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6649 }
Owen Taylor3473f882001-02-23 17:55:21 +00006650 return(-1);
6651 }
6652
6653 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006654
6655 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006656 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006657 if (content != NULL) {
6658 xmlFreeDocElementContent(ctxt->myDoc, content);
6659 }
Owen Taylor3473f882001-02-23 17:55:21 +00006660 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006661 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006662 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006663 "Element declaration doesn't start and stop in"
6664 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006665 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006666
Owen Taylor3473f882001-02-23 17:55:21 +00006667 NEXT;
6668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006669 (ctxt->sax->elementDecl != NULL)) {
6670 if (content != NULL)
6671 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006672 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6673 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006674 if ((content != NULL) && (content->parent == NULL)) {
6675 /*
6676 * this is a trick: if xmlAddElementDecl is called,
6677 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006678 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006679 * interfaces or change the API/ABI
6680 */
6681 xmlFreeDocElementContent(ctxt->myDoc, content);
6682 }
6683 } else if (content != NULL) {
6684 xmlFreeDocElementContent(ctxt->myDoc, content);
6685 }
Owen Taylor3473f882001-02-23 17:55:21 +00006686 }
Owen Taylor3473f882001-02-23 17:55:21 +00006687 }
6688 return(ret);
6689}
6690
6691/**
Owen Taylor3473f882001-02-23 17:55:21 +00006692 * xmlParseConditionalSections
6693 * @ctxt: an XML parser context
6694 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006695 * [61] conditionalSect ::= includeSect | ignoreSect
6696 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006697 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6698 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6699 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6700 */
6701
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006702static void
Owen Taylor3473f882001-02-23 17:55:21 +00006703xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006704 int *inputIds = NULL;
6705 size_t inputIdsSize = 0;
6706 size_t depth = 0;
Daniel Veillard49d44052008-08-27 19:57:06 +00006707
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006708 while (ctxt->instate != XML_PARSER_EOF) {
6709 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6710 int id = ctxt->input->id;
6711
6712 SKIP(3);
6713 SKIP_BLANKS;
6714
6715 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6716 SKIP(7);
6717 SKIP_BLANKS;
6718 if (RAW != '[') {
6719 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6720 xmlHaltParser(ctxt);
6721 goto error;
6722 }
6723 if (ctxt->input->id != id) {
6724 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6725 "All markup of the conditional section is"
6726 " not in the same entity\n");
6727 }
6728 NEXT;
6729
6730 if (inputIdsSize <= depth) {
6731 int *tmp;
6732
6733 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6734 tmp = (int *) xmlRealloc(inputIds,
6735 inputIdsSize * sizeof(int));
6736 if (tmp == NULL) {
6737 xmlErrMemory(ctxt, NULL);
6738 goto error;
6739 }
6740 inputIds = tmp;
6741 }
6742 inputIds[depth] = id;
6743 depth++;
6744 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6745 int state;
6746 xmlParserInputState instate;
6747 size_t ignoreDepth = 0;
6748
6749 SKIP(6);
6750 SKIP_BLANKS;
6751 if (RAW != '[') {
6752 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6753 xmlHaltParser(ctxt);
6754 goto error;
6755 }
6756 if (ctxt->input->id != id) {
6757 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6758 "All markup of the conditional section is"
6759 " not in the same entity\n");
6760 }
6761 NEXT;
6762
6763 /*
6764 * Parse up to the end of the conditional section but disable
6765 * SAX event generating DTD building in the meantime
6766 */
6767 state = ctxt->disableSAX;
6768 instate = ctxt->instate;
6769 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6770 ctxt->instate = XML_PARSER_IGNORE;
6771
6772 while (RAW != 0) {
6773 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6774 SKIP(3);
6775 ignoreDepth++;
6776 /* Check for integer overflow */
6777 if (ignoreDepth == 0) {
6778 xmlErrMemory(ctxt, NULL);
6779 goto error;
6780 }
6781 } else if ((RAW == ']') && (NXT(1) == ']') &&
6782 (NXT(2) == '>')) {
6783 if (ignoreDepth == 0)
6784 break;
6785 SKIP(3);
6786 ignoreDepth--;
6787 } else {
6788 NEXT;
6789 }
6790 }
6791
6792 ctxt->disableSAX = state;
6793 ctxt->instate = instate;
6794
6795 if (RAW == 0) {
6796 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6797 goto error;
6798 }
6799 if (ctxt->input->id != id) {
6800 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801 "All markup of the conditional section is"
6802 " not in the same entity\n");
6803 }
6804 SKIP(3);
6805 } else {
6806 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6807 xmlHaltParser(ctxt);
6808 goto error;
6809 }
6810 } else if ((depth > 0) &&
6811 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6812 depth--;
6813 if (ctxt->input->id != inputIds[depth]) {
6814 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6815 "All markup of the conditional section is not"
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006816 " in the same entity\n");
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006817 }
6818 SKIP(3);
6819 } else {
6820 const xmlChar *check = CUR_PTR;
6821 unsigned int cons = ctxt->input->consumed;
6822
6823 xmlParseMarkupDecl(ctxt);
6824
6825 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6826 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6827 xmlHaltParser(ctxt);
6828 goto error;
6829 }
6830 }
6831
6832 if (depth == 0)
6833 break;
Owen Taylor3473f882001-02-23 17:55:21 +00006834
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006835 SKIP_BLANKS;
6836 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006837 }
6838
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006839error:
6840 xmlFree(inputIds);
Owen Taylor3473f882001-02-23 17:55:21 +00006841}
6842
6843/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006844 * xmlParseMarkupDecl:
6845 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006846 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006847 * parse Markup declarations
6848 *
6849 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6850 * NotationDecl | PI | Comment
6851 *
6852 * [ VC: Proper Declaration/PE Nesting ]
6853 * Parameter-entity replacement text must be properly nested with
6854 * markup declarations. That is to say, if either the first character
6855 * or the last character of a markup declaration (markupdecl above) is
6856 * contained in the replacement text for a parameter-entity reference,
6857 * both must be contained in the same replacement text.
6858 *
6859 * [ WFC: PEs in Internal Subset ]
6860 * In the internal DTD subset, parameter-entity references can occur
6861 * only where markup declarations can occur, not within markup declarations.
6862 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006863 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006864 */
6865void
6866xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6867 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006868 if (CUR == '<') {
6869 if (NXT(1) == '!') {
6870 switch (NXT(2)) {
6871 case 'E':
6872 if (NXT(3) == 'L')
6873 xmlParseElementDecl(ctxt);
6874 else if (NXT(3) == 'N')
6875 xmlParseEntityDecl(ctxt);
6876 break;
6877 case 'A':
6878 xmlParseAttributeListDecl(ctxt);
6879 break;
6880 case 'N':
6881 xmlParseNotationDecl(ctxt);
6882 break;
6883 case '-':
6884 xmlParseComment(ctxt);
6885 break;
6886 default:
6887 /* there is an error but it will be detected later */
6888 break;
6889 }
6890 } else if (NXT(1) == '?') {
6891 xmlParsePI(ctxt);
6892 }
6893 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006894
6895 /*
6896 * detect requirement to exit there and act accordingly
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006897 * and avoid having instate overridden later on
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006898 */
6899 if (ctxt->instate == XML_PARSER_EOF)
6900 return;
6901
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006902 ctxt->instate = XML_PARSER_DTD;
6903}
6904
6905/**
6906 * xmlParseTextDecl:
6907 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006908 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006909 * parse an XML declaration header for external entities
6910 *
6911 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006912 */
6913
6914void
6915xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6916 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006917 const xmlChar *encoding;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006918 int oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006919
6920 /*
6921 * We know that '<?xml' is here.
6922 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006923 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006924 SKIP(5);
6925 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006926 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006927 return;
6928 }
6929
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006930 /* Avoid expansion of parameter entities when skipping blanks. */
6931 oldstate = ctxt->instate;
6932 ctxt->instate = XML_PARSER_START;
6933
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006934 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6936 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006937 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006938
6939 /*
6940 * We may have the VersionInfo here.
6941 */
6942 version = xmlParseVersionInfo(ctxt);
6943 if (version == NULL)
6944 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006945 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006946 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6948 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006949 }
6950 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006951 ctxt->input->version = version;
6952
6953 /*
6954 * We must have the encoding declaration
6955 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006956 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006957 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6958 /*
6959 * The XML REC instructs us to stop parsing right here
6960 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006961 ctxt->instate = oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006962 return;
6963 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006964 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6965 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6966 "Missing encoding in text declaration\n");
6967 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006968
6969 SKIP_BLANKS;
6970 if ((RAW == '?') && (NXT(1) == '>')) {
6971 SKIP(2);
6972 } else if (RAW == '>') {
6973 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006974 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006975 NEXT;
6976 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006977 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006978 MOVETO_ENDTAG(CUR_PTR);
6979 NEXT;
6980 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006981
6982 ctxt->instate = oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006983}
6984
6985/**
Owen Taylor3473f882001-02-23 17:55:21 +00006986 * xmlParseExternalSubset:
6987 * @ctxt: an XML parser context
6988 * @ExternalID: the external identifier
6989 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006990 *
Owen Taylor3473f882001-02-23 17:55:21 +00006991 * parse Markup declarations from an external subset
6992 *
6993 * [30] extSubset ::= textDecl? extSubsetDecl
6994 *
6995 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6996 */
6997void
6998xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6999 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007000 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007001 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007002
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007003 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007004 (ctxt->input->end - ctxt->input->cur >= 4)) {
7005 xmlChar start[4];
7006 xmlCharEncoding enc;
7007
7008 start[0] = RAW;
7009 start[1] = NXT(1);
7010 start[2] = NXT(2);
7011 start[3] = NXT(3);
7012 enc = xmlDetectCharEncoding(start, 4);
7013 if (enc != XML_CHAR_ENCODING_NONE)
7014 xmlSwitchEncoding(ctxt, enc);
7015 }
7016
Daniel Veillarda07050d2003-10-19 14:46:32 +00007017 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007018 xmlParseTextDecl(ctxt);
7019 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7020 /*
7021 * The XML REC instructs us to stop parsing right here
7022 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007023 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007024 return;
7025 }
7026 }
7027 if (ctxt->myDoc == NULL) {
7028 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007029 if (ctxt->myDoc == NULL) {
7030 xmlErrMemory(ctxt, "New Doc failed");
7031 return;
7032 }
7033 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007034 }
7035 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7036 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7037
7038 ctxt->instate = XML_PARSER_DTD;
7039 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007040 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00007041 while (((RAW == '<') && (NXT(1) == '?')) ||
7042 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007043 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007044 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007045 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007046
7047 GROW;
7048 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7049 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007050 } else
7051 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02007052 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00007053
Daniel Veillardfdc91562002-07-01 21:52:03 +00007054 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007055 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007056 break;
7057 }
7058 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007059
Owen Taylor3473f882001-02-23 17:55:21 +00007060 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007061 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007062 }
7063
7064}
7065
7066/**
7067 * xmlParseReference:
7068 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007069 *
Owen Taylor3473f882001-02-23 17:55:21 +00007070 * parse and handle entity references in content, depending on the SAX
7071 * interface, this may end-up in a call to character() if this is a
7072 * CharRef, a predefined entity, if there is no reference() callback.
7073 * or if the parser was asked to switch to that mode.
7074 *
7075 * [67] Reference ::= EntityRef | CharRef
7076 */
7077void
7078xmlParseReference(xmlParserCtxtPtr ctxt) {
7079 xmlEntityPtr ent;
7080 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007081 int was_checked;
7082 xmlNodePtr list = NULL;
7083 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007084
Daniel Veillard0161e632008-08-28 15:36:32 +00007085
7086 if (RAW != '&')
7087 return;
7088
7089 /*
7090 * Simple case of a CharRef
7091 */
Owen Taylor3473f882001-02-23 17:55:21 +00007092 if (NXT(1) == '#') {
7093 int i = 0;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007094 xmlChar out[16];
Owen Taylor3473f882001-02-23 17:55:21 +00007095 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007096 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007097
Daniel Veillarddc171602008-03-26 17:41:38 +00007098 if (value == 0)
7099 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007100 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7101 /*
7102 * So we are using non-UTF-8 buffers
7103 * Check that the char fit on 8bits, if not
7104 * generate a CharRef.
7105 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007106 if (value <= 0xFF) {
7107 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007108 out[1] = 0;
7109 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7110 (!ctxt->disableSAX))
7111 ctxt->sax->characters(ctxt->userData, out, 1);
7112 } else {
7113 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007114 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007115 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007116 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007117 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7118 (!ctxt->disableSAX))
7119 ctxt->sax->reference(ctxt->userData, out);
7120 }
7121 } else {
7122 /*
7123 * Just encode the value in UTF-8
7124 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007125 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007126 out[i] = 0;
7127 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7128 (!ctxt->disableSAX))
7129 ctxt->sax->characters(ctxt->userData, out, i);
7130 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007131 return;
7132 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007133
Daniel Veillard0161e632008-08-28 15:36:32 +00007134 /*
7135 * We are seeing an entity reference
7136 */
7137 ent = xmlParseEntityRef(ctxt);
7138 if (ent == NULL) return;
7139 if (!ctxt->wellFormed)
7140 return;
7141 was_checked = ent->checked;
7142
7143 /* special case of predefined entities */
7144 if ((ent->name == NULL) ||
7145 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7146 val = ent->content;
7147 if (val == NULL) return;
7148 /*
7149 * inline the entity.
7150 */
7151 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7152 (!ctxt->disableSAX))
7153 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7154 return;
7155 }
7156
7157 /*
7158 * The first reference to the entity trigger a parsing phase
7159 * where the ent->children is filled with the result from
7160 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007161 * Note: external parsed entities will not be loaded, it is not
7162 * required for a non-validating parser, unless the parsing option
7163 * of validating, or substituting entities were given. Doing so is
7164 * far more secure as the parser will only process data coming from
7165 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007166 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007167 if (((ent->checked == 0) ||
7168 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007169 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7170 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007171 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillard0161e632008-08-28 15:36:32 +00007172
7173 /*
7174 * This is a bit hackish but this seems the best
7175 * way to make sure both SAX and DOM entity support
7176 * behaves okay.
7177 */
7178 void *user_data;
7179 if (ctxt->userData == ctxt)
7180 user_data = NULL;
7181 else
7182 user_data = ctxt->userData;
7183
7184 /*
7185 * Check that this entity is well formed
7186 * 4.3.2: An internal general parsed entity is well-formed
7187 * if its replacement text matches the production labeled
7188 * content.
7189 */
7190 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7191 ctxt->depth++;
7192 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7193 user_data, &list);
7194 ctxt->depth--;
7195
7196 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7197 ctxt->depth++;
7198 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7199 user_data, ctxt->depth, ent->URI,
7200 ent->ExternalID, &list);
7201 ctxt->depth--;
7202 } else {
7203 ret = XML_ERR_ENTITY_PE_INTERNAL;
7204 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7205 "invalid entity type found\n", NULL);
7206 }
7207
7208 /*
7209 * Store the number of entities needing parsing for this entity
7210 * content and do checkings
7211 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007212 diff = ctxt->nbentities - oldnbent + 1;
7213 if (diff > INT_MAX / 2)
7214 diff = INT_MAX / 2;
7215 ent->checked = diff * 2;
Daniel Veillardcff25462013-03-11 15:57:55 +08007216 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7217 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007218 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007219 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Haibo Huangd75f3892021-01-05 21:34:50 -08007220 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007221 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007222 return;
7223 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007224 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007225 xmlFreeNodeList(list);
7226 return;
7227 }
Owen Taylor3473f882001-02-23 17:55:21 +00007228
Daniel Veillard0161e632008-08-28 15:36:32 +00007229 if ((ret == XML_ERR_OK) && (list != NULL)) {
7230 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7231 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7232 (ent->children == NULL)) {
7233 ent->children = list;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007234 /*
7235 * Prune it directly in the generated document
7236 * except for single text nodes.
7237 */
7238 if ((ctxt->replaceEntities == 0) ||
7239 (ctxt->parseMode == XML_PARSE_READER) ||
7240 ((list->type == XML_TEXT_NODE) &&
7241 (list->next == NULL))) {
7242 ent->owner = 1;
7243 while (list != NULL) {
7244 list->parent = (xmlNodePtr) ent;
7245 xmlSetTreeDoc(list, ent->doc);
7246 if (list->next == NULL)
7247 ent->last = list;
7248 list = list->next;
7249 }
7250 list = NULL;
7251 } else {
7252 ent->owner = 0;
7253 while (list != NULL) {
7254 list->parent = (xmlNodePtr) ctxt->node;
7255 list->doc = ctxt->myDoc;
7256 if (list->next == NULL)
7257 ent->last = list;
7258 list = list->next;
7259 }
7260 list = ent->children;
Daniel Veillard0161e632008-08-28 15:36:32 +00007261#ifdef LIBXML_LEGACY_ENABLED
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007262 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7263 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007264#endif /* LIBXML_LEGACY_ENABLED */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007265 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007266 } else {
7267 xmlFreeNodeList(list);
7268 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007269 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007270 } else if ((ret != XML_ERR_OK) &&
7271 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7272 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7273 "Entity '%s' failed to parse\n", ent->name);
Nick Wellnhofer60dded12018-01-22 15:04:58 +01007274 if (ent->content != NULL)
7275 ent->content[0] = 0;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007276 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007277 } else if (list != NULL) {
7278 xmlFreeNodeList(list);
7279 list = NULL;
7280 }
7281 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007282 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007283
7284 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7285 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007286 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007287 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007288 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007289
Daniel Veillard0161e632008-08-28 15:36:32 +00007290 /*
7291 * Now that the entity content has been gathered
7292 * provide it to the application, this can take different forms based
7293 * on the parsing modes.
7294 */
7295 if (ent->children == NULL) {
7296 /*
7297 * Probably running in SAX mode and the callbacks don't
7298 * build the entity content. So unless we already went
7299 * though parsing for first checking go though the entity
7300 * content to generate callbacks associated to the entity
7301 */
7302 if (was_checked != 0) {
7303 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007304 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007305 * This is a bit hackish but this seems the best
7306 * way to make sure both SAX and DOM entity support
7307 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007308 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007309 if (ctxt->userData == ctxt)
7310 user_data = NULL;
7311 else
7312 user_data = ctxt->userData;
7313
7314 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7315 ctxt->depth++;
7316 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7317 ent->content, user_data, NULL);
7318 ctxt->depth--;
7319 } else if (ent->etype ==
7320 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7321 ctxt->depth++;
7322 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7323 ctxt->sax, user_data, ctxt->depth,
7324 ent->URI, ent->ExternalID, NULL);
7325 ctxt->depth--;
7326 } else {
7327 ret = XML_ERR_ENTITY_PE_INTERNAL;
7328 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7329 "invalid entity type found\n", NULL);
7330 }
7331 if (ret == XML_ERR_ENTITY_LOOP) {
7332 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7333 return;
7334 }
7335 }
7336 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7337 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7338 /*
7339 * Entity reference callback comes second, it's somewhat
7340 * superfluous but a compatibility to historical behaviour
7341 */
7342 ctxt->sax->reference(ctxt->userData, ent->name);
7343 }
7344 return;
7345 }
7346
7347 /*
7348 * If we didn't get any children for the entity being built
7349 */
7350 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7351 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7352 /*
7353 * Create a node.
7354 */
7355 ctxt->sax->reference(ctxt->userData, ent->name);
7356 return;
7357 }
7358
7359 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7360 /*
7361 * There is a problem on the handling of _private for entities
7362 * (bug 155816): Should we copy the content of the field from
7363 * the entity (possibly overwriting some value set by the user
7364 * when a copy is created), should we leave it alone, or should
7365 * we try to take care of different situations? The problem
7366 * is exacerbated by the usage of this field by the xmlReader.
7367 * To fix this bug, we look at _private on the created node
7368 * and, if it's NULL, we copy in whatever was in the entity.
7369 * If it's not NULL we leave it alone. This is somewhat of a
7370 * hack - maybe we should have further tests to determine
7371 * what to do.
7372 */
7373 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7374 /*
7375 * Seems we are generating the DOM content, do
7376 * a simple tree copy for all references except the first
7377 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007378 */
7379 if (((list == NULL) && (ent->owner == 0)) ||
7380 (ctxt->parseMode == XML_PARSE_READER)) {
7381 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7382
7383 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007384 * We are copying here, make sure there is no abuse
7385 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007386 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007387 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7388 return;
7389
7390 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007391 * when operating on a reader, the entities definitions
7392 * are always owning the entities subtree.
7393 if (ctxt->parseMode == XML_PARSE_READER)
7394 ent->owner = 1;
7395 */
7396
7397 cur = ent->children;
7398 while (cur != NULL) {
7399 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7400 if (nw != NULL) {
7401 if (nw->_private == NULL)
7402 nw->_private = cur->_private;
7403 if (firstChild == NULL){
7404 firstChild = nw;
7405 }
7406 nw = xmlAddChild(ctxt->node, nw);
7407 }
7408 if (cur == ent->last) {
7409 /*
7410 * needed to detect some strange empty
7411 * node cases in the reader tests
7412 */
7413 if ((ctxt->parseMode == XML_PARSE_READER) &&
7414 (nw != NULL) &&
7415 (nw->type == XML_ELEMENT_NODE) &&
7416 (nw->children == NULL))
7417 nw->extra = 1;
7418
7419 break;
7420 }
7421 cur = cur->next;
7422 }
7423#ifdef LIBXML_LEGACY_ENABLED
7424 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7425 xmlAddEntityReference(ent, firstChild, nw);
7426#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007427 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007428 xmlNodePtr nw = NULL, cur, next, last,
7429 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007430
7431 /*
7432 * We are copying here, make sure there is no abuse
7433 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007434 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007435 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7436 return;
7437
Daniel Veillard0161e632008-08-28 15:36:32 +00007438 /*
7439 * Copy the entity child list and make it the new
7440 * entity child list. The goal is to make sure any
7441 * ID or REF referenced will be the one from the
7442 * document content and not the entity copy.
7443 */
7444 cur = ent->children;
7445 ent->children = NULL;
7446 last = ent->last;
7447 ent->last = NULL;
7448 while (cur != NULL) {
7449 next = cur->next;
7450 cur->next = NULL;
7451 cur->parent = NULL;
7452 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7453 if (nw != NULL) {
7454 if (nw->_private == NULL)
7455 nw->_private = cur->_private;
7456 if (firstChild == NULL){
7457 firstChild = cur;
7458 }
7459 xmlAddChild((xmlNodePtr) ent, nw);
7460 xmlAddChild(ctxt->node, cur);
7461 }
7462 if (cur == last)
7463 break;
7464 cur = next;
7465 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007466 if (ent->owner == 0)
7467 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007468#ifdef LIBXML_LEGACY_ENABLED
7469 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7470 xmlAddEntityReference(ent, firstChild, nw);
7471#endif /* LIBXML_LEGACY_ENABLED */
7472 } else {
7473 const xmlChar *nbktext;
7474
7475 /*
7476 * the name change is to avoid coalescing of the
7477 * node with a possible previous text one which
7478 * would make ent->children a dangling pointer
7479 */
7480 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7481 -1);
7482 if (ent->children->type == XML_TEXT_NODE)
7483 ent->children->name = nbktext;
7484 if ((ent->last != ent->children) &&
7485 (ent->last->type == XML_TEXT_NODE))
7486 ent->last->name = nbktext;
7487 xmlAddChildList(ctxt->node, ent->children);
7488 }
7489
7490 /*
7491 * This is to avoid a nasty side effect, see
7492 * characters() in SAX.c
7493 */
7494 ctxt->nodemem = 0;
7495 ctxt->nodelen = 0;
7496 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007497 }
7498 }
7499}
7500
7501/**
7502 * xmlParseEntityRef:
7503 * @ctxt: an XML parser context
7504 *
7505 * parse ENTITY references declarations
7506 *
7507 * [68] EntityRef ::= '&' Name ';'
7508 *
7509 * [ WFC: Entity Declared ]
7510 * In a document without any DTD, a document with only an internal DTD
7511 * subset which contains no parameter entity references, or a document
7512 * with "standalone='yes'", the Name given in the entity reference
7513 * must match that in an entity declaration, except that well-formed
7514 * documents need not declare any of the following entities: amp, lt,
7515 * gt, apos, quot. The declaration of a parameter entity must precede
7516 * any reference to it. Similarly, the declaration of a general entity
7517 * must precede any reference to it which appears in a default value in an
7518 * attribute-list declaration. Note that if entities are declared in the
7519 * external subset or in external parameter entities, a non-validating
7520 * processor is not obligated to read and process their declarations;
7521 * for such documents, the rule that an entity must be declared is a
7522 * well-formedness constraint only if standalone='yes'.
7523 *
7524 * [ WFC: Parsed Entity ]
7525 * An entity reference must not contain the name of an unparsed entity
7526 *
7527 * Returns the xmlEntityPtr if found, or NULL otherwise.
7528 */
7529xmlEntityPtr
7530xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007531 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007532 xmlEntityPtr ent = NULL;
7533
7534 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007535 if (ctxt->instate == XML_PARSER_EOF)
7536 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007537
Daniel Veillard0161e632008-08-28 15:36:32 +00007538 if (RAW != '&')
7539 return(NULL);
7540 NEXT;
7541 name = xmlParseName(ctxt);
7542 if (name == NULL) {
7543 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7544 "xmlParseEntityRef: no name\n");
7545 return(NULL);
7546 }
7547 if (RAW != ';') {
7548 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7549 return(NULL);
7550 }
7551 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007552
Daniel Veillard0161e632008-08-28 15:36:32 +00007553 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007554 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007555 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007556 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7557 ent = xmlGetPredefinedEntity(name);
7558 if (ent != NULL)
7559 return(ent);
7560 }
Owen Taylor3473f882001-02-23 17:55:21 +00007561
Daniel Veillard0161e632008-08-28 15:36:32 +00007562 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007563 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007564 */
7565 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007566
Daniel Veillard0161e632008-08-28 15:36:32 +00007567 /*
7568 * Ask first SAX for entity resolution, otherwise try the
7569 * entities which may have stored in the parser context.
7570 */
7571 if (ctxt->sax != NULL) {
7572 if (ctxt->sax->getEntity != NULL)
7573 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007574 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007575 (ctxt->options & XML_PARSE_OLDSAX))
7576 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007577 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7578 (ctxt->userData==ctxt)) {
7579 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007580 }
7581 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007582 if (ctxt->instate == XML_PARSER_EOF)
7583 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007584 /*
7585 * [ WFC: Entity Declared ]
7586 * In a document without any DTD, a document with only an
7587 * internal DTD subset which contains no parameter entity
7588 * references, or a document with "standalone='yes'", the
7589 * Name given in the entity reference must match that in an
7590 * entity declaration, except that well-formed documents
7591 * need not declare any of the following entities: amp, lt,
7592 * gt, apos, quot.
7593 * The declaration of a parameter entity must precede any
7594 * reference to it.
7595 * Similarly, the declaration of a general entity must
7596 * precede any reference to it which appears in a default
7597 * value in an attribute-list declaration. Note that if
7598 * entities are declared in the external subset or in
7599 * external parameter entities, a non-validating processor
7600 * is not obligated to read and process their declarations;
7601 * for such documents, the rule that an entity must be
7602 * declared is a well-formedness constraint only if
7603 * standalone='yes'.
7604 */
7605 if (ent == NULL) {
7606 if ((ctxt->standalone == 1) ||
7607 ((ctxt->hasExternalSubset == 0) &&
7608 (ctxt->hasPErefs == 0))) {
7609 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7610 "Entity '%s' not defined\n", name);
7611 } else {
7612 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7613 "Entity '%s' not defined\n", name);
7614 if ((ctxt->inSubset == 0) &&
7615 (ctxt->sax != NULL) &&
7616 (ctxt->sax->reference != NULL)) {
7617 ctxt->sax->reference(ctxt->userData, name);
7618 }
7619 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007620 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007621 ctxt->valid = 0;
7622 }
7623
7624 /*
7625 * [ WFC: Parsed Entity ]
7626 * An entity reference must not contain the name of an
7627 * unparsed entity
7628 */
7629 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7630 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7631 "Entity reference to unparsed entity %s\n", name);
7632 }
7633
7634 /*
7635 * [ WFC: No External Entity References ]
7636 * Attribute values cannot contain direct or indirect
7637 * entity references to external entities.
7638 */
7639 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7640 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7641 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7642 "Attribute references external entity '%s'\n", name);
7643 }
7644 /*
7645 * [ WFC: No < in Attribute Values ]
7646 * The replacement text of any entity referred to directly or
7647 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007648 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007649 */
7650 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007651 (ent != NULL) &&
7652 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007653 if (((ent->checked & 1) || (ent->checked == 0)) &&
7654 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007655 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7656 "'<' in entity '%s' is not allowed in attributes values\n", name);
7657 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007658 }
7659
7660 /*
7661 * Internal check, no parameter entities here ...
7662 */
7663 else {
7664 switch (ent->etype) {
7665 case XML_INTERNAL_PARAMETER_ENTITY:
7666 case XML_EXTERNAL_PARAMETER_ENTITY:
7667 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7668 "Attempt to reference the parameter entity '%s'\n",
7669 name);
7670 break;
7671 default:
7672 break;
7673 }
7674 }
7675
7676 /*
7677 * [ WFC: No Recursion ]
7678 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007679 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007680 * Done somewhere else
7681 */
Owen Taylor3473f882001-02-23 17:55:21 +00007682 return(ent);
7683}
7684
7685/**
7686 * xmlParseStringEntityRef:
7687 * @ctxt: an XML parser context
7688 * @str: a pointer to an index in the string
7689 *
7690 * parse ENTITY references declarations, but this version parses it from
7691 * a string value.
7692 *
7693 * [68] EntityRef ::= '&' Name ';'
7694 *
7695 * [ WFC: Entity Declared ]
7696 * In a document without any DTD, a document with only an internal DTD
7697 * subset which contains no parameter entity references, or a document
7698 * with "standalone='yes'", the Name given in the entity reference
7699 * must match that in an entity declaration, except that well-formed
7700 * documents need not declare any of the following entities: amp, lt,
7701 * gt, apos, quot. The declaration of a parameter entity must precede
7702 * any reference to it. Similarly, the declaration of a general entity
7703 * must precede any reference to it which appears in a default value in an
7704 * attribute-list declaration. Note that if entities are declared in the
7705 * external subset or in external parameter entities, a non-validating
7706 * processor is not obligated to read and process their declarations;
7707 * for such documents, the rule that an entity must be declared is a
7708 * well-formedness constraint only if standalone='yes'.
7709 *
7710 * [ WFC: Parsed Entity ]
7711 * An entity reference must not contain the name of an unparsed entity
7712 *
7713 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7714 * is updated to the current location in the string.
7715 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007716static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007717xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7718 xmlChar *name;
7719 const xmlChar *ptr;
7720 xmlChar cur;
7721 xmlEntityPtr ent = NULL;
7722
7723 if ((str == NULL) || (*str == NULL))
7724 return(NULL);
7725 ptr = *str;
7726 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007727 if (cur != '&')
7728 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007729
Daniel Veillard0161e632008-08-28 15:36:32 +00007730 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007731 name = xmlParseStringName(ctxt, &ptr);
7732 if (name == NULL) {
7733 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7734 "xmlParseStringEntityRef: no name\n");
7735 *str = ptr;
7736 return(NULL);
7737 }
7738 if (*ptr != ';') {
7739 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007740 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007741 *str = ptr;
7742 return(NULL);
7743 }
7744 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007745
Owen Taylor3473f882001-02-23 17:55:21 +00007746
Daniel Veillard0161e632008-08-28 15:36:32 +00007747 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007748 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007749 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007750 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7751 ent = xmlGetPredefinedEntity(name);
7752 if (ent != NULL) {
7753 xmlFree(name);
7754 *str = ptr;
7755 return(ent);
7756 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007757 }
Owen Taylor3473f882001-02-23 17:55:21 +00007758
Daniel Veillard0161e632008-08-28 15:36:32 +00007759 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007760 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007761 */
7762 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007763
Daniel Veillard0161e632008-08-28 15:36:32 +00007764 /*
7765 * Ask first SAX for entity resolution, otherwise try the
7766 * entities which may have stored in the parser context.
7767 */
7768 if (ctxt->sax != NULL) {
7769 if (ctxt->sax->getEntity != NULL)
7770 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007771 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7772 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007773 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7774 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007775 }
7776 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007777 if (ctxt->instate == XML_PARSER_EOF) {
7778 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007779 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007780 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007781
7782 /*
7783 * [ WFC: Entity Declared ]
7784 * In a document without any DTD, a document with only an
7785 * internal DTD subset which contains no parameter entity
7786 * references, or a document with "standalone='yes'", the
7787 * Name given in the entity reference must match that in an
7788 * entity declaration, except that well-formed documents
7789 * need not declare any of the following entities: amp, lt,
7790 * gt, apos, quot.
7791 * The declaration of a parameter entity must precede any
7792 * reference to it.
7793 * Similarly, the declaration of a general entity must
7794 * precede any reference to it which appears in a default
7795 * value in an attribute-list declaration. Note that if
7796 * entities are declared in the external subset or in
7797 * external parameter entities, a non-validating processor
7798 * is not obligated to read and process their declarations;
7799 * for such documents, the rule that an entity must be
7800 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007801 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007802 */
7803 if (ent == NULL) {
7804 if ((ctxt->standalone == 1) ||
7805 ((ctxt->hasExternalSubset == 0) &&
7806 (ctxt->hasPErefs == 0))) {
7807 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7808 "Entity '%s' not defined\n", name);
7809 } else {
7810 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7811 "Entity '%s' not defined\n",
7812 name);
7813 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007814 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007815 /* TODO ? check regressions ctxt->valid = 0; */
7816 }
7817
7818 /*
7819 * [ WFC: Parsed Entity ]
7820 * An entity reference must not contain the name of an
7821 * unparsed entity
7822 */
7823 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7824 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7825 "Entity reference to unparsed entity %s\n", name);
7826 }
7827
7828 /*
7829 * [ WFC: No External Entity References ]
7830 * Attribute values cannot contain direct or indirect
7831 * entity references to external entities.
7832 */
7833 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7834 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7835 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7836 "Attribute references external entity '%s'\n", name);
7837 }
7838 /*
7839 * [ WFC: No < in Attribute Values ]
7840 * The replacement text of any entity referred to directly or
7841 * indirectly in an attribute value (other than "&lt;") must
7842 * not contain a <.
7843 */
7844 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7845 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007846 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007847 (xmlStrchr(ent->content, '<'))) {
7848 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7849 "'<' in entity '%s' is not allowed in attributes values\n",
7850 name);
7851 }
7852
7853 /*
7854 * Internal check, no parameter entities here ...
7855 */
7856 else {
7857 switch (ent->etype) {
7858 case XML_INTERNAL_PARAMETER_ENTITY:
7859 case XML_EXTERNAL_PARAMETER_ENTITY:
7860 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7861 "Attempt to reference the parameter entity '%s'\n",
7862 name);
7863 break;
7864 default:
7865 break;
7866 }
7867 }
7868
7869 /*
7870 * [ WFC: No Recursion ]
7871 * A parsed entity must not contain a recursive reference
7872 * to itself, either directly or indirectly.
7873 * Done somewhere else
7874 */
7875
7876 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007877 *str = ptr;
7878 return(ent);
7879}
7880
7881/**
7882 * xmlParsePEReference:
7883 * @ctxt: an XML parser context
7884 *
7885 * parse PEReference declarations
7886 * The entity content is handled directly by pushing it's content as
7887 * a new input stream.
7888 *
7889 * [69] PEReference ::= '%' Name ';'
7890 *
7891 * [ WFC: No Recursion ]
7892 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007893 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007894 *
7895 * [ WFC: Entity Declared ]
7896 * In a document without any DTD, a document with only an internal DTD
7897 * subset which contains no parameter entity references, or a document
7898 * with "standalone='yes'", ... ... The declaration of a parameter
7899 * entity must precede any reference to it...
7900 *
7901 * [ VC: Entity Declared ]
7902 * In a document with an external subset or external parameter entities
7903 * with "standalone='no'", ... ... The declaration of a parameter entity
7904 * must precede any reference to it...
7905 *
7906 * [ WFC: In DTD ]
7907 * Parameter-entity references may only appear in the DTD.
7908 * NOTE: misleading but this is handled.
7909 */
7910void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007911xmlParsePEReference(xmlParserCtxtPtr ctxt)
7912{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007913 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007914 xmlEntityPtr entity = NULL;
7915 xmlParserInputPtr input;
7916
Daniel Veillard0161e632008-08-28 15:36:32 +00007917 if (RAW != '%')
7918 return;
7919 NEXT;
7920 name = xmlParseName(ctxt);
7921 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007922 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007923 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007924 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007925 if (xmlParserDebugEntities)
7926 xmlGenericError(xmlGenericErrorContext,
7927 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007928 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007929 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007930 return;
7931 }
7932
7933 NEXT;
7934
7935 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007936 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007937 */
7938 ctxt->nbentities++;
7939
7940 /*
7941 * Request the entity from SAX
7942 */
7943 if ((ctxt->sax != NULL) &&
7944 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007945 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7946 if (ctxt->instate == XML_PARSER_EOF)
7947 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007948 if (entity == NULL) {
7949 /*
7950 * [ WFC: Entity Declared ]
7951 * In a document without any DTD, a document with only an
7952 * internal DTD subset which contains no parameter entity
7953 * references, or a document with "standalone='yes'", ...
7954 * ... The declaration of a parameter entity must precede
7955 * any reference to it...
7956 */
7957 if ((ctxt->standalone == 1) ||
7958 ((ctxt->hasExternalSubset == 0) &&
7959 (ctxt->hasPErefs == 0))) {
7960 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7961 "PEReference: %%%s; not found\n",
7962 name);
7963 } else {
7964 /*
7965 * [ VC: Entity Declared ]
7966 * In a document with an external subset or external
7967 * parameter entities with "standalone='no'", ...
7968 * ... The declaration of a parameter entity must
7969 * precede any reference to it...
7970 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007971 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7972 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7973 "PEReference: %%%s; not found\n",
7974 name, NULL);
7975 } else
7976 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7977 "PEReference: %%%s; not found\n",
7978 name, NULL);
7979 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007980 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007981 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007982 } else {
7983 /*
7984 * Internal checking in case the entity quest barfed
7985 */
7986 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7987 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7988 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7989 "Internal: %%%s; is not a parameter entity\n",
7990 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007991 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007992 xmlChar start[4];
7993 xmlCharEncoding enc;
7994
Elliott Hughese54f00d2021-05-13 08:13:46 -07007995 if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7996 return;
7997
Neel Mehta90ccb582017-04-07 17:43:02 +02007998 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7999 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8000 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8001 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8002 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8003 (ctxt->replaceEntities == 0) &&
8004 (ctxt->validate == 0))
8005 return;
8006
Daniel Veillard0161e632008-08-28 15:36:32 +00008007 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02008008 if (xmlPushInput(ctxt, input) < 0) {
8009 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00008010 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02008011 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02008012
8013 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8014 /*
8015 * Get the 4 first bytes and decode the charset
8016 * if enc != XML_CHAR_ENCODING_NONE
8017 * plug some encoding conversion routines.
8018 * Note that, since we may have some non-UTF8
8019 * encoding (like UTF16, bug 135229), the 'length'
8020 * is not known, but we can calculate based upon
8021 * the amount of data in the buffer.
8022 */
8023 GROW
8024 if (ctxt->instate == XML_PARSER_EOF)
8025 return;
8026 if ((ctxt->input->end - ctxt->input->cur)>=4) {
8027 start[0] = RAW;
8028 start[1] = NXT(1);
8029 start[2] = NXT(2);
8030 start[3] = NXT(3);
8031 enc = xmlDetectCharEncoding(start, 4);
8032 if (enc != XML_CHAR_ENCODING_NONE) {
8033 xmlSwitchEncoding(ctxt, enc);
8034 }
8035 }
8036
8037 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8038 (IS_BLANK_CH(NXT(5)))) {
8039 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02008040 }
8041 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008042 }
8043 }
8044 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008045}
8046
8047/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008048 * xmlLoadEntityContent:
8049 * @ctxt: an XML parser context
8050 * @entity: an unloaded system entity
8051 *
8052 * Load the original content of the given system entity from the
8053 * ExternalID/SystemID given. This is to be used for Included in Literal
8054 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8055 *
8056 * Returns 0 in case of success and -1 in case of failure
8057 */
8058static int
8059xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8060 xmlParserInputPtr input;
8061 xmlBufferPtr buf;
8062 int l, c;
8063 int count = 0;
8064
8065 if ((ctxt == NULL) || (entity == NULL) ||
8066 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8067 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8068 (entity->content != NULL)) {
8069 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8070 "xmlLoadEntityContent parameter error");
8071 return(-1);
8072 }
8073
8074 if (xmlParserDebugEntities)
8075 xmlGenericError(xmlGenericErrorContext,
8076 "Reading %s entity content input\n", entity->name);
8077
8078 buf = xmlBufferCreate();
8079 if (buf == NULL) {
8080 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8081 "xmlLoadEntityContent parameter error");
8082 return(-1);
8083 }
8084
8085 input = xmlNewEntityInputStream(ctxt, entity);
8086 if (input == NULL) {
8087 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8088 "xmlLoadEntityContent input error");
8089 xmlBufferFree(buf);
8090 return(-1);
8091 }
8092
8093 /*
8094 * Push the entity as the current input, read char by char
8095 * saving to the buffer until the end of the entity or an error
8096 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008097 if (xmlPushInput(ctxt, input) < 0) {
8098 xmlBufferFree(buf);
8099 return(-1);
8100 }
8101
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008102 GROW;
8103 c = CUR_CHAR(l);
8104 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8105 (IS_CHAR(c))) {
8106 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008107 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008108 count = 0;
8109 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008110 if (ctxt->instate == XML_PARSER_EOF) {
8111 xmlBufferFree(buf);
8112 return(-1);
8113 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008114 }
8115 NEXTL(l);
8116 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008117 if (c == 0) {
8118 count = 0;
8119 GROW;
8120 if (ctxt->instate == XML_PARSER_EOF) {
8121 xmlBufferFree(buf);
8122 return(-1);
8123 }
8124 c = CUR_CHAR(l);
8125 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008126 }
8127
8128 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8129 xmlPopInput(ctxt);
8130 } else if (!IS_CHAR(c)) {
8131 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8132 "xmlLoadEntityContent: invalid char value %d\n",
8133 c);
8134 xmlBufferFree(buf);
8135 return(-1);
8136 }
8137 entity->content = buf->content;
8138 buf->content = NULL;
8139 xmlBufferFree(buf);
8140
8141 return(0);
8142}
8143
8144/**
Owen Taylor3473f882001-02-23 17:55:21 +00008145 * xmlParseStringPEReference:
8146 * @ctxt: an XML parser context
8147 * @str: a pointer to an index in the string
8148 *
8149 * parse PEReference declarations
8150 *
8151 * [69] PEReference ::= '%' Name ';'
8152 *
8153 * [ WFC: No Recursion ]
8154 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008155 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008156 *
8157 * [ WFC: Entity Declared ]
8158 * In a document without any DTD, a document with only an internal DTD
8159 * subset which contains no parameter entity references, or a document
8160 * with "standalone='yes'", ... ... The declaration of a parameter
8161 * entity must precede any reference to it...
8162 *
8163 * [ VC: Entity Declared ]
8164 * In a document with an external subset or external parameter entities
8165 * with "standalone='no'", ... ... The declaration of a parameter entity
8166 * must precede any reference to it...
8167 *
8168 * [ WFC: In DTD ]
8169 * Parameter-entity references may only appear in the DTD.
8170 * NOTE: misleading but this is handled.
8171 *
8172 * Returns the string of the entity content.
8173 * str is updated to the current value of the index
8174 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008175static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008176xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8177 const xmlChar *ptr;
8178 xmlChar cur;
8179 xmlChar *name;
8180 xmlEntityPtr entity = NULL;
8181
8182 if ((str == NULL) || (*str == NULL)) return(NULL);
8183 ptr = *str;
8184 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008185 if (cur != '%')
8186 return(NULL);
8187 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008188 name = xmlParseStringName(ctxt, &ptr);
8189 if (name == NULL) {
8190 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8191 "xmlParseStringPEReference: no name\n");
8192 *str = ptr;
8193 return(NULL);
8194 }
8195 cur = *ptr;
8196 if (cur != ';') {
8197 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8198 xmlFree(name);
8199 *str = ptr;
8200 return(NULL);
8201 }
8202 ptr++;
8203
8204 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008205 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00008206 */
8207 ctxt->nbentities++;
8208
8209 /*
8210 * Request the entity from SAX
8211 */
8212 if ((ctxt->sax != NULL) &&
8213 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008214 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8215 if (ctxt->instate == XML_PARSER_EOF) {
8216 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008217 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008218 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008219 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008220 if (entity == NULL) {
8221 /*
8222 * [ WFC: Entity Declared ]
8223 * In a document without any DTD, a document with only an
8224 * internal DTD subset which contains no parameter entity
8225 * references, or a document with "standalone='yes'", ...
8226 * ... The declaration of a parameter entity must precede
8227 * any reference to it...
8228 */
8229 if ((ctxt->standalone == 1) ||
8230 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8231 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8232 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008233 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008234 /*
8235 * [ VC: Entity Declared ]
8236 * In a document with an external subset or external
8237 * parameter entities with "standalone='no'", ...
8238 * ... The declaration of a parameter entity must
8239 * precede any reference to it...
8240 */
8241 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8242 "PEReference: %%%s; not found\n",
8243 name, NULL);
8244 ctxt->valid = 0;
8245 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008246 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008247 } else {
8248 /*
8249 * Internal checking in case the entity quest barfed
8250 */
8251 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8252 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8253 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8254 "%%%s; is not a parameter entity\n",
8255 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008256 }
8257 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008258 ctxt->hasPErefs = 1;
8259 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008260 *str = ptr;
8261 return(entity);
8262}
8263
8264/**
8265 * xmlParseDocTypeDecl:
8266 * @ctxt: an XML parser context
8267 *
8268 * parse a DOCTYPE declaration
8269 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008270 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008271 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8272 *
8273 * [ VC: Root Element Type ]
8274 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008275 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008276 */
8277
8278void
8279xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008280 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008281 xmlChar *ExternalID = NULL;
8282 xmlChar *URI = NULL;
8283
8284 /*
8285 * We know that '<!DOCTYPE' has been detected.
8286 */
8287 SKIP(9);
8288
8289 SKIP_BLANKS;
8290
8291 /*
8292 * Parse the DOCTYPE name.
8293 */
8294 name = xmlParseName(ctxt);
8295 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008296 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8297 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008298 }
8299 ctxt->intSubName = name;
8300
8301 SKIP_BLANKS;
8302
8303 /*
8304 * Check for SystemID and ExternalID
8305 */
8306 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8307
8308 if ((URI != NULL) || (ExternalID != NULL)) {
8309 ctxt->hasExternalSubset = 1;
8310 }
8311 ctxt->extSubURI = URI;
8312 ctxt->extSubSystem = ExternalID;
8313
8314 SKIP_BLANKS;
8315
8316 /*
8317 * Create and update the internal subset.
8318 */
8319 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8320 (!ctxt->disableSAX))
8321 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008322 if (ctxt->instate == XML_PARSER_EOF)
8323 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008324
8325 /*
8326 * Is there any internal subset declarations ?
8327 * they are handled separately in xmlParseInternalSubset()
8328 */
8329 if (RAW == '[')
8330 return;
8331
8332 /*
8333 * We should be at the end of the DOCTYPE declaration.
8334 */
8335 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008336 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008337 }
8338 NEXT;
8339}
8340
8341/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008342 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008343 * @ctxt: an XML parser context
8344 *
8345 * parse the internal subset declaration
8346 *
8347 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8348 */
8349
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008350static void
Owen Taylor3473f882001-02-23 17:55:21 +00008351xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8352 /*
8353 * Is there any DTD definition ?
8354 */
8355 if (RAW == '[') {
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008356 int baseInputNr = ctxt->inputNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008357 ctxt->instate = XML_PARSER_DTD;
8358 NEXT;
8359 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008360 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008361 * PEReferences.
8362 * Subsequence (markupdecl | PEReference | S)*
8363 */
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008364 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008365 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008366 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008367 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008368
8369 SKIP_BLANKS;
8370 xmlParseMarkupDecl(ctxt);
8371 xmlParsePEReference(ctxt);
8372
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008373 /*
8374 * Conditional sections are allowed from external entities included
8375 * by PE References in the internal subset.
8376 */
8377 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8378 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8379 xmlParseConditionalSections(ctxt);
8380 }
8381
Owen Taylor3473f882001-02-23 17:55:21 +00008382 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008383 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008384 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008385 if (ctxt->inputNr > baseInputNr)
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008386 xmlPopInput(ctxt);
8387 else
8388 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008389 }
8390 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008391 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008392 NEXT;
8393 SKIP_BLANKS;
8394 }
8395 }
8396
8397 /*
8398 * We should be at the end of the DOCTYPE declaration.
8399 */
8400 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008401 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008402 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008403 }
8404 NEXT;
8405}
8406
Daniel Veillard81273902003-09-30 00:43:48 +00008407#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008408/**
8409 * xmlParseAttribute:
8410 * @ctxt: an XML parser context
8411 * @value: a xmlChar ** used to store the value of the attribute
8412 *
8413 * parse an attribute
8414 *
8415 * [41] Attribute ::= Name Eq AttValue
8416 *
8417 * [ WFC: No External Entity References ]
8418 * Attribute values cannot contain direct or indirect entity references
8419 * to external entities.
8420 *
8421 * [ WFC: No < in Attribute Values ]
8422 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008423 * an attribute value (other than "&lt;") must not contain a <.
8424 *
Owen Taylor3473f882001-02-23 17:55:21 +00008425 * [ VC: Attribute Value Type ]
8426 * The attribute must have been declared; the value must be of the type
8427 * declared for it.
8428 *
8429 * [25] Eq ::= S? '=' S?
8430 *
8431 * With namespace:
8432 *
8433 * [NS 11] Attribute ::= QName Eq AttValue
8434 *
8435 * Also the case QName == xmlns:??? is handled independently as a namespace
8436 * definition.
8437 *
8438 * Returns the attribute name, and the value in *value.
8439 */
8440
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008441const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008442xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008443 const xmlChar *name;
8444 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008445
8446 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008447 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008448 name = xmlParseName(ctxt);
8449 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008450 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008451 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008452 return(NULL);
8453 }
8454
8455 /*
8456 * read the value
8457 */
8458 SKIP_BLANKS;
8459 if (RAW == '=') {
8460 NEXT;
8461 SKIP_BLANKS;
8462 val = xmlParseAttValue(ctxt);
8463 ctxt->instate = XML_PARSER_CONTENT;
8464 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008465 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008466 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008467 return(NULL);
8468 }
8469
8470 /*
8471 * Check that xml:lang conforms to the specification
8472 * No more registered as an error, just generate a warning now
8473 * since this was deprecated in XML second edition
8474 */
8475 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8476 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008477 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8478 "Malformed value for xml:lang : %s\n",
8479 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008480 }
8481 }
8482
8483 /*
8484 * Check that xml:space conforms to the specification
8485 */
8486 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8487 if (xmlStrEqual(val, BAD_CAST "default"))
8488 *(ctxt->space) = 0;
8489 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8490 *(ctxt->space) = 1;
8491 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008492 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008493"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008494 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008495 }
8496 }
8497
8498 *value = val;
8499 return(name);
8500}
8501
8502/**
8503 * xmlParseStartTag:
8504 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008505 *
Owen Taylor3473f882001-02-23 17:55:21 +00008506 * parse a start of tag either for rule element or
8507 * EmptyElement. In both case we don't parse the tag closing chars.
8508 *
8509 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8510 *
8511 * [ WFC: Unique Att Spec ]
8512 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008513 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008514 *
8515 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8516 *
8517 * [ WFC: Unique Att Spec ]
8518 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008519 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008520 *
8521 * With namespace:
8522 *
8523 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8524 *
8525 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8526 *
8527 * Returns the element name parsed
8528 */
8529
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008530const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008531xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008532 const xmlChar *name;
8533 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008534 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008535 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008536 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008537 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008538 int i;
8539
8540 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008541 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008542
8543 name = xmlParseName(ctxt);
8544 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008545 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008546 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008547 return(NULL);
8548 }
8549
8550 /*
8551 * Now parse the attributes, it ends up with the ending
8552 *
8553 * (S Attribute)* S?
8554 */
8555 SKIP_BLANKS;
8556 GROW;
8557
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008558 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008559 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008560 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008561 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008562 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008563
8564 attname = xmlParseAttribute(ctxt, &attvalue);
8565 if ((attname != NULL) && (attvalue != NULL)) {
8566 /*
8567 * [ WFC: Unique Att Spec ]
8568 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008569 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008570 */
8571 for (i = 0; i < nbatts;i += 2) {
8572 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008573 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008574 xmlFree(attvalue);
8575 goto failed;
8576 }
8577 }
Owen Taylor3473f882001-02-23 17:55:21 +00008578 /*
8579 * Add the pair to atts
8580 */
8581 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008582 maxatts = 22; /* allow for 10 attrs by default */
8583 atts = (const xmlChar **)
8584 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008585 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008586 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008587 if (attvalue != NULL)
8588 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008589 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008590 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008591 ctxt->atts = atts;
8592 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008593 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008594 const xmlChar **n;
8595
Owen Taylor3473f882001-02-23 17:55:21 +00008596 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008597 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008598 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008599 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008600 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008601 if (attvalue != NULL)
8602 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008603 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008604 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008605 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008606 ctxt->atts = atts;
8607 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008608 }
8609 atts[nbatts++] = attname;
8610 atts[nbatts++] = attvalue;
8611 atts[nbatts] = NULL;
8612 atts[nbatts + 1] = NULL;
8613 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008614 if (attvalue != NULL)
8615 xmlFree(attvalue);
8616 }
8617
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008618failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008619
Daniel Veillard3772de32002-12-17 10:31:45 +00008620 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008621 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8622 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008623 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008624 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8625 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008626 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008627 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8628 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008629 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8630 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008631 break;
8632 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008633 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008634 GROW;
8635 }
8636
8637 /*
8638 * SAX: Start of Element !
8639 */
8640 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008641 (!ctxt->disableSAX)) {
8642 if (nbatts > 0)
8643 ctxt->sax->startElement(ctxt->userData, name, atts);
8644 else
8645 ctxt->sax->startElement(ctxt->userData, name, NULL);
8646 }
Owen Taylor3473f882001-02-23 17:55:21 +00008647
8648 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008649 /* Free only the content strings */
8650 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008651 if (atts[i] != NULL)
8652 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008653 }
8654 return(name);
8655}
8656
8657/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008658 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008659 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008660 * @line: line of the start tag
8661 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008662 *
8663 * parse an end of tag
8664 *
8665 * [42] ETag ::= '</' Name S? '>'
8666 *
8667 * With namespace
8668 *
8669 * [NS 9] ETag ::= '</' QName S? '>'
8670 */
8671
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008672static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008674 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008675
8676 GROW;
8677 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008678 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008679 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008680 return;
8681 }
8682 SKIP(2);
8683
Daniel Veillard46de64e2002-05-29 08:21:33 +00008684 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008685
8686 /*
8687 * We should definitely be at the ending "S? '>'" part
8688 */
8689 GROW;
8690 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008691 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008692 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008693 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008694 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008695
8696 /*
8697 * [ WFC: Element Type Match ]
8698 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008699 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008700 *
8701 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008702 if (name != (xmlChar*)1) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008703 if (name == NULL) name = BAD_CAST "unparsable";
Daniel Veillardf403d292003-10-05 13:51:35 +00008704 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008705 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008706 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008707 }
8708
8709 /*
8710 * SAX: End of Tag
8711 */
8712 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8713 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008714 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008715
Daniel Veillarde57ec792003-09-10 10:50:59 +00008716 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008717 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008718 return;
8719}
8720
8721/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008722 * xmlParseEndTag:
8723 * @ctxt: an XML parser context
8724 *
8725 * parse an end of tag
8726 *
8727 * [42] ETag ::= '</' Name S? '>'
8728 *
8729 * With namespace
8730 *
8731 * [NS 9] ETag ::= '</' QName S? '>'
8732 */
8733
8734void
8735xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736 xmlParseEndTag1(ctxt, 0);
8737}
Daniel Veillard81273902003-09-30 00:43:48 +00008738#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008739
8740/************************************************************************
8741 * *
8742 * SAX 2 specific operations *
8743 * *
8744 ************************************************************************/
8745
Daniel Veillard0fb18932003-09-07 09:14:37 +00008746/*
8747 * xmlGetNamespace:
8748 * @ctxt: an XML parser context
8749 * @prefix: the prefix to lookup
8750 *
8751 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008752 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008753 *
8754 * Returns the namespace name or NULL if not bound
8755 */
8756static const xmlChar *
8757xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8758 int i;
8759
Daniel Veillarde57ec792003-09-10 10:50:59 +00008760 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008761 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008762 if (ctxt->nsTab[i] == prefix) {
8763 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8764 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008765 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008766 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008767 return(NULL);
8768}
8769
8770/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771 * xmlParseQName:
8772 * @ctxt: an XML parser context
8773 * @prefix: pointer to store the prefix part
8774 *
8775 * parse an XML Namespace QName
8776 *
8777 * [6] QName ::= (Prefix ':')? LocalPart
8778 * [7] Prefix ::= NCName
8779 * [8] LocalPart ::= NCName
8780 *
8781 * Returns the Name parsed or NULL
8782 */
8783
8784static const xmlChar *
8785xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8786 const xmlChar *l, *p;
8787
8788 GROW;
8789
8790 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008791 if (l == NULL) {
8792 if (CUR == ':') {
8793 l = xmlParseName(ctxt);
8794 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008795 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008796 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008797 *prefix = NULL;
8798 return(l);
8799 }
8800 }
8801 return(NULL);
8802 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008803 if (CUR == ':') {
8804 NEXT;
8805 p = l;
8806 l = xmlParseNCName(ctxt);
8807 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008808 xmlChar *tmp;
8809
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008810 if (ctxt->instate == XML_PARSER_EOF)
8811 return(NULL);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008812 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8813 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008814 l = xmlParseNmtoken(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008815 if (l == NULL) {
8816 if (ctxt->instate == XML_PARSER_EOF)
8817 return(NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008818 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008819 } else {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008820 tmp = xmlBuildQName(l, p, NULL, 0);
8821 xmlFree((char *)l);
8822 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008823 p = xmlDictLookup(ctxt->dict, tmp, -1);
8824 if (tmp != NULL) xmlFree(tmp);
8825 *prefix = NULL;
8826 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008827 }
8828 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008829 xmlChar *tmp;
8830
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008831 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8832 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008833 NEXT;
8834 tmp = (xmlChar *) xmlParseName(ctxt);
8835 if (tmp != NULL) {
8836 tmp = xmlBuildQName(tmp, l, NULL, 0);
8837 l = xmlDictLookup(ctxt->dict, tmp, -1);
8838 if (tmp != NULL) xmlFree(tmp);
8839 *prefix = p;
8840 return(l);
8841 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008842 if (ctxt->instate == XML_PARSER_EOF)
8843 return(NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008844 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8845 l = xmlDictLookup(ctxt->dict, tmp, -1);
8846 if (tmp != NULL) xmlFree(tmp);
8847 *prefix = p;
8848 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 }
8850 *prefix = p;
8851 } else
8852 *prefix = NULL;
8853 return(l);
8854}
8855
8856/**
8857 * xmlParseQNameAndCompare:
8858 * @ctxt: an XML parser context
8859 * @name: the localname
8860 * @prefix: the prefix, if any.
8861 *
8862 * parse an XML name and compares for match
8863 * (specialized for endtag parsing)
8864 *
8865 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8866 * and the name for mismatch
8867 */
8868
8869static const xmlChar *
8870xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8871 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008872 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008873 const xmlChar *in;
8874 const xmlChar *ret;
8875 const xmlChar *prefix2;
8876
8877 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8878
8879 GROW;
8880 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008881
Daniel Veillard0fb18932003-09-07 09:14:37 +00008882 cmp = prefix;
8883 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008884 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008885 ++cmp;
8886 }
8887 if ((*cmp == 0) && (*in == ':')) {
8888 in++;
8889 cmp = name;
8890 while (*in != 0 && *in == *cmp) {
8891 ++in;
8892 ++cmp;
8893 }
William M. Brack76e95df2003-10-18 16:20:14 +00008894 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008895 /* success */
Haibo Huangf0a546b2020-09-01 20:28:19 -07008896 ctxt->input->col += in - ctxt->input->cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008897 ctxt->input->cur = in;
8898 return((const xmlChar*) 1);
8899 }
8900 }
8901 /*
8902 * all strings coms from the dictionary, equality can be done directly
8903 */
8904 ret = xmlParseQName (ctxt, &prefix2);
8905 if ((ret == name) && (prefix == prefix2))
8906 return((const xmlChar*) 1);
8907 return ret;
8908}
8909
8910/**
8911 * xmlParseAttValueInternal:
8912 * @ctxt: an XML parser context
8913 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008914 * @alloc: whether the attribute was reallocated as a new string
8915 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008916 *
8917 * parse a value for an attribute.
8918 * NOTE: if no normalization is needed, the routine will return pointers
8919 * directly from the data buffer.
8920 *
8921 * 3.3.3 Attribute-Value Normalization:
8922 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008923 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008924 * - a character reference is processed by appending the referenced
8925 * character to the attribute value
8926 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008927 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008928 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8929 * appending #x20 to the normalized value, except that only a single
8930 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008931 * parsed entity or the literal entity value of an internal parsed entity
8932 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008933 * If the declared value is not CDATA, then the XML processor must further
8934 * process the normalized attribute value by discarding any leading and
8935 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008936 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008937 * All attributes for which no declaration has been read should be treated
8938 * by a non-validating parser as if declared CDATA.
8939 *
8940 * Returns the AttValue parsed or NULL. The value has to be freed by the
8941 * caller if it was copied, this can be detected by val[*len] == 0.
8942 */
8943
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008944#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8945 const xmlChar *oldbase = ctxt->input->base;\
8946 GROW;\
8947 if (ctxt->instate == XML_PARSER_EOF)\
8948 return(NULL);\
8949 if (oldbase != ctxt->input->base) {\
8950 ptrdiff_t delta = ctxt->input->base - oldbase;\
8951 start = start + delta;\
8952 in = in + delta;\
8953 }\
8954 end = ctxt->input->end;
8955
Daniel Veillard0fb18932003-09-07 09:14:37 +00008956static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008957xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8958 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008959{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008960 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008961 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008962 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008963 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008964
8965 GROW;
8966 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008967 line = ctxt->input->line;
8968 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008969 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008970 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008971 return (NULL);
8972 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008973 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008974
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008975 /*
8976 * try to handle in this routine the most common case where no
8977 * allocation of a new string is required and where content is
8978 * pure ASCII.
8979 */
8980 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008981 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008982 end = ctxt->input->end;
8983 start = in;
8984 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008985 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillard0fb18932003-09-07 09:14:37 +00008986 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008987 if (normalize) {
8988 /*
8989 * Skip any leading spaces
8990 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008991 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008992 ((*in == 0x20) || (*in == 0x9) ||
8993 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008994 if (*in == 0xA) {
8995 line++; col = 1;
8996 } else {
8997 col++;
8998 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008999 in++;
9000 start = in;
9001 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009002 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08009003 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9004 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9005 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009006 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009007 return(NULL);
9008 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009009 }
9010 }
9011 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9012 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009013 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009014 if ((*in++ == 0x20) && (*in == 0x20)) break;
9015 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009016 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08009017 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9018 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9019 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009020 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009021 return(NULL);
9022 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009023 }
9024 }
9025 last = in;
9026 /*
9027 * skip the trailing blanks
9028 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009029 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009030 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009031 ((*in == 0x20) || (*in == 0x9) ||
9032 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009033 if (*in == 0xA) {
9034 line++, col = 1;
9035 } else {
9036 col++;
9037 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009038 in++;
9039 if (in >= end) {
9040 const xmlChar *oldbase = ctxt->input->base;
9041 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009042 if (ctxt->instate == XML_PARSER_EOF)
9043 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009044 if (oldbase != ctxt->input->base) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009045 ptrdiff_t delta = ctxt->input->base - oldbase;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009046 start = start + delta;
9047 in = in + delta;
9048 last = last + delta;
9049 }
9050 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009051 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9052 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9053 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009054 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009055 return(NULL);
9056 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009057 }
9058 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009059 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9060 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9061 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009062 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009063 return(NULL);
9064 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009065 if (*in != limit) goto need_complex;
9066 } else {
9067 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9068 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9069 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009070 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009071 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009072 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08009073 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9074 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9075 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009076 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009077 return(NULL);
9078 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009079 }
9080 }
9081 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009082 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9083 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9084 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009085 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009086 return(NULL);
9087 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009088 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009089 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009090 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009091 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009092 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009093 *len = last - start;
9094 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009096 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009097 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009098 }
9099 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009100 ctxt->input->line = line;
9101 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009102 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009103 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009104need_complex:
9105 if (alloc) *alloc = 1;
9106 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009107}
9108
9109/**
9110 * xmlParseAttribute2:
9111 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009112 * @pref: the element prefix
9113 * @elem: the element name
9114 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009115 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009116 * @len: an int * to save the length of the attribute
9117 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009118 *
9119 * parse an attribute in the new SAX2 framework.
9120 *
9121 * Returns the attribute name, and the value in *value, .
9122 */
9123
9124static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009125xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009126 const xmlChar * pref, const xmlChar * elem,
9127 const xmlChar ** prefix, xmlChar ** value,
9128 int *len, int *alloc)
9129{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009130 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009131 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009132 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009133
9134 *value = NULL;
9135 GROW;
9136 name = xmlParseQName(ctxt, prefix);
9137 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9139 "error parsing attribute name\n");
9140 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009141 }
9142
9143 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009144 * get the type if needed
9145 */
9146 if (ctxt->attsSpecial != NULL) {
9147 int type;
9148
Nick Wellnhoferd422b952017-10-09 13:37:42 +02009149 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9150 pref, elem, *prefix, name);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009151 if (type != 0)
9152 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009153 }
9154
9155 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009156 * read the value
9157 */
9158 SKIP_BLANKS;
9159 if (RAW == '=') {
9160 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009161 SKIP_BLANKS;
9162 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9163 if (normalize) {
9164 /*
9165 * Sometimes a second normalisation pass for spaces is needed
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009166 * but that only happens if charrefs or entities references
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009167 * have been used in the attribute value, i.e. the attribute
9168 * value have been extracted in an allocated string already.
9169 */
9170 if (*alloc) {
9171 const xmlChar *val2;
9172
9173 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009174 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009175 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009176 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009177 }
9178 }
9179 }
9180 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009181 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009182 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009183 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009184 name);
9185 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009186 }
9187
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009188 if (*prefix == ctxt->str_xml) {
9189 /*
9190 * Check that xml:lang conforms to the specification
9191 * No more registered as an error, just generate a warning now
9192 * since this was deprecated in XML second edition
9193 */
9194 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9195 internal_val = xmlStrndup(val, *len);
9196 if (!xmlCheckLanguageID(internal_val)) {
9197 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9198 "Malformed value for xml:lang : %s\n",
9199 internal_val, NULL);
9200 }
9201 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009202
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009203 /*
9204 * Check that xml:space conforms to the specification
9205 */
9206 if (xmlStrEqual(name, BAD_CAST "space")) {
9207 internal_val = xmlStrndup(val, *len);
9208 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9209 *(ctxt->space) = 0;
9210 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9211 *(ctxt->space) = 1;
9212 else {
9213 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9214 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9215 internal_val, NULL);
9216 }
9217 }
9218 if (internal_val) {
9219 xmlFree(internal_val);
9220 }
9221 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009222
9223 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009224 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009225}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009226/**
9227 * xmlParseStartTag2:
9228 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009229 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009230 * parse a start of tag either for rule element or
9231 * EmptyElement. In both case we don't parse the tag closing chars.
9232 * This routine is called when running SAX2 parsing
9233 *
9234 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9235 *
9236 * [ WFC: Unique Att Spec ]
9237 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009238 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009239 *
9240 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9241 *
9242 * [ WFC: Unique Att Spec ]
9243 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009244 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009245 *
9246 * With namespace:
9247 *
9248 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9249 *
9250 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9251 *
9252 * Returns the element name parsed
9253 */
9254
9255static const xmlChar *
9256xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009257 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009258 const xmlChar *localname;
9259 const xmlChar *prefix;
9260 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009261 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009262 const xmlChar *nsname;
9263 xmlChar *attvalue;
9264 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009265 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009266 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009267 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009268 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009269 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009270
9271 if (RAW != '<') return(NULL);
9272 NEXT1;
9273
9274 /*
9275 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9276 * point since the attribute values may be stored as pointers to
9277 * the buffer and calling SHRINK would destroy them !
9278 * The Shrinking is only possible once the full set of attribute
9279 * callbacks have been done.
9280 */
9281 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009282 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009283 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009284 nbatts = 0;
9285 nratts = 0;
9286 nbdef = 0;
9287 nbNs = 0;
9288 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009289 /* Forget any namespaces added during an earlier parse of this element. */
9290 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009291
9292 localname = xmlParseQName(ctxt, &prefix);
9293 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009294 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9295 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009296 return(NULL);
9297 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009298 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009299
9300 /*
9301 * Now parse the attributes, it ends up with the ending
9302 *
9303 * (S Attribute)* S?
9304 */
9305 SKIP_BLANKS;
9306 GROW;
9307
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009308 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009309 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009310 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009311 const xmlChar *q = CUR_PTR;
9312 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009313 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009314
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009315 attname = xmlParseAttribute2(ctxt, prefix, localname,
9316 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009317 if ((attname == NULL) || (attvalue == NULL))
9318 goto next_attr;
9319 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009320
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009321 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9322 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9323 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009324
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009325 if (URL == NULL) {
9326 xmlErrMemory(ctxt, "dictionary allocation failure");
9327 if ((attvalue != NULL) && (alloc != 0))
9328 xmlFree(attvalue);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009329 localname = NULL;
9330 goto done;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009331 }
9332 if (*URL != 0) {
9333 uri = xmlParseURI((const char *) URL);
9334 if (uri == NULL) {
9335 xmlNsErr(ctxt, XML_WAR_NS_URI,
9336 "xmlns: '%s' is not a valid URI\n",
9337 URL, NULL, NULL);
9338 } else {
9339 if (uri->scheme == NULL) {
9340 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9341 "xmlns: URI %s is not absolute\n",
9342 URL, NULL, NULL);
9343 }
9344 xmlFreeURI(uri);
9345 }
Daniel Veillard37334572008-07-31 08:20:02 +00009346 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009347 if (attname != ctxt->str_xml) {
9348 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9349 "xml namespace URI cannot be the default namespace\n",
9350 NULL, NULL, NULL);
9351 }
9352 goto next_attr;
9353 }
9354 if ((len == 29) &&
9355 (xmlStrEqual(URL,
9356 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9357 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9358 "reuse of the xmlns namespace name is forbidden\n",
9359 NULL, NULL, NULL);
9360 goto next_attr;
9361 }
9362 }
9363 /*
9364 * check that it's not a defined namespace
9365 */
9366 for (j = 1;j <= nbNs;j++)
9367 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9368 break;
9369 if (j <= nbNs)
9370 xmlErrAttributeDup(ctxt, NULL, attname);
9371 else
9372 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009373
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009374 } else if (aprefix == ctxt->str_xmlns) {
9375 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9376 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009377
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009378 if (attname == ctxt->str_xml) {
9379 if (URL != ctxt->str_xml_ns) {
9380 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9381 "xml namespace prefix mapped to wrong URI\n",
9382 NULL, NULL, NULL);
9383 }
9384 /*
9385 * Do not keep a namespace definition node
9386 */
9387 goto next_attr;
9388 }
9389 if (URL == ctxt->str_xml_ns) {
9390 if (attname != ctxt->str_xml) {
9391 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392 "xml namespace URI mapped to wrong prefix\n",
9393 NULL, NULL, NULL);
9394 }
9395 goto next_attr;
9396 }
9397 if (attname == ctxt->str_xmlns) {
9398 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399 "redefinition of the xmlns prefix is forbidden\n",
9400 NULL, NULL, NULL);
9401 goto next_attr;
9402 }
9403 if ((len == 29) &&
9404 (xmlStrEqual(URL,
9405 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9406 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407 "reuse of the xmlns namespace name is forbidden\n",
9408 NULL, NULL, NULL);
9409 goto next_attr;
9410 }
9411 if ((URL == NULL) || (URL[0] == 0)) {
9412 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9413 "xmlns:%s: Empty XML namespace is not allowed\n",
9414 attname, NULL, NULL);
9415 goto next_attr;
9416 } else {
9417 uri = xmlParseURI((const char *) URL);
9418 if (uri == NULL) {
9419 xmlNsErr(ctxt, XML_WAR_NS_URI,
9420 "xmlns:%s: '%s' is not a valid URI\n",
9421 attname, URL, NULL);
9422 } else {
9423 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9424 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9425 "xmlns:%s: URI %s is not absolute\n",
9426 attname, URL, NULL);
9427 }
9428 xmlFreeURI(uri);
9429 }
9430 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009431
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009432 /*
9433 * check that it's not a defined namespace
9434 */
9435 for (j = 1;j <= nbNs;j++)
9436 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9437 break;
9438 if (j <= nbNs)
9439 xmlErrAttributeDup(ctxt, aprefix, attname);
9440 else
9441 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9442
9443 } else {
9444 /*
9445 * Add the pair to atts
9446 */
9447 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9448 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9449 goto next_attr;
9450 }
9451 maxatts = ctxt->maxatts;
9452 atts = ctxt->atts;
9453 }
9454 ctxt->attallocs[nratts++] = alloc;
9455 atts[nbatts++] = attname;
9456 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009457 /*
9458 * The namespace URI field is used temporarily to point at the
9459 * base of the current input buffer for non-alloced attributes.
9460 * When the input buffer is reallocated, all the pointers become
9461 * invalid, but they can be reconstructed later.
9462 */
9463 if (alloc)
9464 atts[nbatts++] = NULL;
9465 else
9466 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009467 atts[nbatts++] = attvalue;
9468 attvalue += len;
9469 atts[nbatts++] = attvalue;
9470 /*
9471 * tag if some deallocation is needed
9472 */
9473 if (alloc != 0) attval = 1;
9474 attvalue = NULL; /* moved into atts */
9475 }
9476
9477next_attr:
9478 if ((attvalue != NULL) && (alloc != 0)) {
9479 xmlFree(attvalue);
9480 attvalue = NULL;
9481 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009482
9483 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009484 if (ctxt->instate == XML_PARSER_EOF)
9485 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009486 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9487 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009488 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009489 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9490 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009491 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009492 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009493 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9494 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009495 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009496 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009497 break;
9498 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009499 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009500 }
9501
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009502 if (ctxt->input->id != inputid) {
9503 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9504 "Unexpected change of input\n");
9505 localname = NULL;
9506 goto done;
9507 }
9508
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009509 /* Reconstruct attribute value pointers. */
9510 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9511 if (atts[i+2] != NULL) {
9512 /*
9513 * Arithmetic on dangling pointers is technically undefined
9514 * behavior, but well...
9515 */
9516 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9517 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9518 atts[i+3] += offset; /* value */
9519 atts[i+4] += offset; /* valuend */
9520 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009521 }
9522
Daniel Veillard0fb18932003-09-07 09:14:37 +00009523 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009524 * The attributes defaulting
9525 */
9526 if (ctxt->attsDefault != NULL) {
9527 xmlDefAttrsPtr defaults;
9528
9529 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9530 if (defaults != NULL) {
9531 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009532 attname = defaults->values[5 * i];
9533 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009534
9535 /*
9536 * special work for namespaces defaulted defs
9537 */
9538 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9539 /*
9540 * check that it's not a defined namespace
9541 */
9542 for (j = 1;j <= nbNs;j++)
9543 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9544 break;
9545 if (j <= nbNs) continue;
9546
9547 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009548 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009549 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009550 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009551 nbNs++;
9552 }
9553 } else if (aprefix == ctxt->str_xmlns) {
9554 /*
9555 * check that it's not a defined namespace
9556 */
9557 for (j = 1;j <= nbNs;j++)
9558 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9559 break;
9560 if (j <= nbNs) continue;
9561
9562 nsname = xmlGetNamespace(ctxt, attname);
9563 if (nsname != defaults->values[2]) {
9564 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009565 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009566 nbNs++;
9567 }
9568 } else {
9569 /*
9570 * check that it's not a defined attribute
9571 */
9572 for (j = 0;j < nbatts;j+=5) {
9573 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9574 break;
9575 }
9576 if (j < nbatts) continue;
9577
9578 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9579 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009580 localname = NULL;
9581 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009582 }
9583 maxatts = ctxt->maxatts;
9584 atts = ctxt->atts;
9585 }
9586 atts[nbatts++] = attname;
9587 atts[nbatts++] = aprefix;
9588 if (aprefix == NULL)
9589 atts[nbatts++] = NULL;
9590 else
9591 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009592 atts[nbatts++] = defaults->values[5 * i + 2];
9593 atts[nbatts++] = defaults->values[5 * i + 3];
9594 if ((ctxt->standalone == 1) &&
9595 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009596 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009597 "standalone: attribute %s on %s defaulted from external subset\n",
9598 attname, localname);
9599 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009600 nbdef++;
9601 }
9602 }
9603 }
9604 }
9605
Daniel Veillarde70c8772003-11-25 07:21:18 +00009606 /*
9607 * The attributes checkings
9608 */
9609 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009610 /*
9611 * The default namespace does not apply to attribute names.
9612 */
9613 if (atts[i + 1] != NULL) {
9614 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9615 if (nsname == NULL) {
9616 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9617 "Namespace prefix %s for %s on %s is not defined\n",
9618 atts[i + 1], atts[i], localname);
9619 }
9620 atts[i + 2] = nsname;
9621 } else
9622 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009623 /*
9624 * [ WFC: Unique Att Spec ]
9625 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009626 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009627 * As extended by the Namespace in XML REC.
9628 */
9629 for (j = 0; j < i;j += 5) {
9630 if (atts[i] == atts[j]) {
9631 if (atts[i+1] == atts[j+1]) {
9632 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9633 break;
9634 }
9635 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9636 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9637 "Namespaced Attribute %s in '%s' redefined\n",
9638 atts[i], nsname, NULL);
9639 break;
9640 }
9641 }
9642 }
9643 }
9644
Daniel Veillarde57ec792003-09-10 10:50:59 +00009645 nsname = xmlGetNamespace(ctxt, prefix);
9646 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009647 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9648 "Namespace prefix %s on %s is not defined\n",
9649 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009650 }
9651 *pref = prefix;
9652 *URI = nsname;
9653
9654 /*
9655 * SAX: Start of Element !
9656 */
9657 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9658 (!ctxt->disableSAX)) {
9659 if (nbNs > 0)
9660 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9661 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9662 nbatts / 5, nbdef, atts);
9663 else
9664 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9665 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9666 }
9667
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009668done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009669 /*
9670 * Free up attribute allocated strings if needed
9671 */
9672 if (attval != 0) {
9673 for (i = 3,j = 0; j < nratts;i += 5,j++)
9674 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9675 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009676 }
9677
9678 return(localname);
9679}
9680
9681/**
9682 * xmlParseEndTag2:
9683 * @ctxt: an XML parser context
9684 * @line: line of the start tag
9685 * @nsNr: number of namespaces on the start tag
9686 *
9687 * parse an end of tag
9688 *
9689 * [42] ETag ::= '</' Name S? '>'
9690 *
9691 * With namespace
9692 *
9693 * [NS 9] ETag ::= '</' QName S? '>'
9694 */
9695
9696static void
Elliott Hughese54f00d2021-05-13 08:13:46 -07009697xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009698 const xmlChar *name;
9699
9700 GROW;
9701 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009702 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009703 return;
9704 }
9705 SKIP(2);
9706
Elliott Hughese54f00d2021-05-13 08:13:46 -07009707 if (tag->prefix == NULL)
9708 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9709 else
9710 name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009711
9712 /*
9713 * We should definitely be at the ending "S? '>'" part
9714 */
9715 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009716 if (ctxt->instate == XML_PARSER_EOF)
9717 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009718 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009719 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009720 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009721 } else
9722 NEXT1;
9723
9724 /*
9725 * [ WFC: Element Type Match ]
9726 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009727 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009728 *
9729 */
9730 if (name != (xmlChar*)1) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009731 if (name == NULL) name = BAD_CAST "unparsable";
Daniel Veillardf403d292003-10-05 13:51:35 +00009732 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009733 "Opening and ending tag mismatch: %s line %d and %s\n",
Elliott Hughese54f00d2021-05-13 08:13:46 -07009734 ctxt->name, tag->line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009735 }
9736
9737 /*
9738 * SAX: End of Tag
9739 */
9740 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9741 (!ctxt->disableSAX))
Elliott Hughese54f00d2021-05-13 08:13:46 -07009742 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9743 tag->URI);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009744
Daniel Veillard0fb18932003-09-07 09:14:37 +00009745 spacePop(ctxt);
Elliott Hughese54f00d2021-05-13 08:13:46 -07009746 if (tag->nsNr != 0)
9747 nsPop(ctxt, tag->nsNr);
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009748}
9749
9750/**
Owen Taylor3473f882001-02-23 17:55:21 +00009751 * xmlParseCDSect:
9752 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009753 *
Owen Taylor3473f882001-02-23 17:55:21 +00009754 * Parse escaped pure raw content.
9755 *
9756 * [18] CDSect ::= CDStart CData CDEnd
9757 *
9758 * [19] CDStart ::= '<![CDATA['
9759 *
9760 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9761 *
9762 * [21] CDEnd ::= ']]>'
9763 */
9764void
9765xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9766 xmlChar *buf = NULL;
9767 int len = 0;
9768 int size = XML_PARSER_BUFFER_SIZE;
9769 int r, rl;
9770 int s, sl;
9771 int cur, l;
9772 int count = 0;
9773
Daniel Veillard8f597c32003-10-06 08:19:27 +00009774 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009775 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009776 SKIP(9);
9777 } else
9778 return;
9779
9780 ctxt->instate = XML_PARSER_CDATA_SECTION;
9781 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009782 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009783 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009784 ctxt->instate = XML_PARSER_CONTENT;
9785 return;
9786 }
9787 NEXTL(rl);
9788 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009789 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009790 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009791 ctxt->instate = XML_PARSER_CONTENT;
9792 return;
9793 }
9794 NEXTL(sl);
9795 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009796 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009797 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009798 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009799 return;
9800 }
William M. Brack871611b2003-10-18 04:53:14 +00009801 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009802 ((r != ']') || (s != ']') || (cur != '>'))) {
9803 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009804 xmlChar *tmp;
9805
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009806 if ((size > XML_MAX_TEXT_LENGTH) &&
9807 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9808 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9809 "CData section too big found", NULL);
9810 xmlFree (buf);
9811 return;
9812 }
9813 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009814 if (tmp == NULL) {
9815 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009816 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009817 return;
9818 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009819 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009820 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009821 }
9822 COPY_BUF(rl,buf,len,r);
9823 r = s;
9824 rl = sl;
9825 s = cur;
9826 sl = l;
9827 count++;
9828 if (count > 50) {
Haibo Huangca689272021-02-09 16:43:43 -08009829 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00009830 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009831 if (ctxt->instate == XML_PARSER_EOF) {
9832 xmlFree(buf);
9833 return;
9834 }
Owen Taylor3473f882001-02-23 17:55:21 +00009835 count = 0;
9836 }
9837 NEXTL(l);
9838 cur = CUR_CHAR(l);
9839 }
9840 buf[len] = 0;
9841 ctxt->instate = XML_PARSER_CONTENT;
9842 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009843 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009844 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009845 xmlFree(buf);
9846 return;
9847 }
9848 NEXTL(l);
9849
9850 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009851 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009852 */
9853 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9854 if (ctxt->sax->cdataBlock != NULL)
9855 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009856 else if (ctxt->sax->characters != NULL)
9857 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009858 }
9859 xmlFree(buf);
9860}
9861
9862/**
Elliott Hughese54f00d2021-05-13 08:13:46 -07009863 * xmlParseContentInternal:
Owen Taylor3473f882001-02-23 17:55:21 +00009864 * @ctxt: an XML parser context
9865 *
Elliott Hughese54f00d2021-05-13 08:13:46 -07009866 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9867 * unexpected EOF to the caller.
Owen Taylor3473f882001-02-23 17:55:21 +00009868 */
9869
Elliott Hughese54f00d2021-05-13 08:13:46 -07009870static void
9871xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009872 int nameNr = ctxt->nameNr;
9873
Owen Taylor3473f882001-02-23 17:55:21 +00009874 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009875 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009876 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009877 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009878 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009879 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009880
9881 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009882 * First case : a Processing Instruction.
9883 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009884 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009885 xmlParsePI(ctxt);
9886 }
9887
9888 /*
9889 * Second case : a CDSection
9890 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009891 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009892 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009893 xmlParseCDSect(ctxt);
9894 }
9895
9896 /*
9897 * Third case : a comment
9898 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009899 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009900 (NXT(2) == '-') && (NXT(3) == '-')) {
9901 xmlParseComment(ctxt);
9902 ctxt->instate = XML_PARSER_CONTENT;
9903 }
9904
9905 /*
9906 * Fourth case : a sub-element.
9907 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009908 else if (*cur == '<') {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009909 if (NXT(1) == '/') {
9910 if (ctxt->nameNr <= nameNr)
9911 break;
9912 xmlParseElementEnd(ctxt);
9913 } else {
9914 xmlParseElementStart(ctxt);
9915 }
Owen Taylor3473f882001-02-23 17:55:21 +00009916 }
9917
9918 /*
9919 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009920 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009921 */
9922
Daniel Veillard21a0f912001-02-25 19:54:14 +00009923 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009924 xmlParseReference(ctxt);
9925 }
9926
9927 /*
9928 * Last case, text. Note that References are handled directly.
9929 */
9930 else {
9931 xmlParseCharData(ctxt, 0);
9932 }
9933
9934 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009935 SHRINK;
9936
Daniel Veillardfdc91562002-07-01 21:52:03 +00009937 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009938 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9939 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009940 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009941 break;
9942 }
9943 }
9944}
9945
9946/**
Elliott Hughese54f00d2021-05-13 08:13:46 -07009947 * xmlParseContent:
9948 * @ctxt: an XML parser context
9949 *
9950 * Parse a content sequence. Stops at EOF or '</'.
9951 *
9952 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9953 */
9954
9955void
9956xmlParseContent(xmlParserCtxtPtr ctxt) {
9957 int nameNr = ctxt->nameNr;
9958
9959 xmlParseContentInternal(ctxt);
9960
9961 if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9962 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9963 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9964 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9965 "Premature end of data in tag %s line %d\n",
9966 name, line, NULL);
9967 }
9968}
9969
9970/**
Owen Taylor3473f882001-02-23 17:55:21 +00009971 * xmlParseElement:
9972 * @ctxt: an XML parser context
9973 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009974 * parse an XML element
Owen Taylor3473f882001-02-23 17:55:21 +00009975 *
9976 * [39] element ::= EmptyElemTag | STag content ETag
9977 *
9978 * [ WFC: Element Type Match ]
9979 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009980 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009981 *
Owen Taylor3473f882001-02-23 17:55:21 +00009982 */
9983
9984void
9985xmlParseElement(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009986 if (xmlParseElementStart(ctxt) != 0)
9987 return;
Elliott Hughese54f00d2021-05-13 08:13:46 -07009988
9989 xmlParseContentInternal(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009990 if (ctxt->instate == XML_PARSER_EOF)
9991 return;
Elliott Hughese54f00d2021-05-13 08:13:46 -07009992
9993 if (CUR == 0) {
9994 const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9995 int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9996 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9997 "Premature end of data in tag %s line %d\n",
9998 name, line, NULL);
9999 return;
10000 }
10001
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010002 xmlParseElementEnd(ctxt);
10003}
10004
10005/**
10006 * xmlParseElementStart:
10007 * @ctxt: an XML parser context
10008 *
10009 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10010 * opening tag was parsed, 1 if an empty element was parsed.
10011 */
10012static int
10013xmlParseElementStart(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010014 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010015 const xmlChar *prefix = NULL;
10016 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010017 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010018 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010019 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010020 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010021
Daniel Veillard8915c152008-08-26 13:05:34 +000010022 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10023 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10024 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10025 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10026 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010027 xmlHaltParser(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010028 return(-1);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010029 }
10030
Owen Taylor3473f882001-02-23 17:55:21 +000010031 /* Capture start position */
10032 if (ctxt->record_info) {
10033 node_info.begin_pos = ctxt->input->consumed +
10034 (CUR_PTR - ctxt->input->base);
10035 node_info.begin_line = ctxt->input->line;
10036 }
10037
10038 if (ctxt->spaceNr == 0)
10039 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010040 else if (*ctxt->space == -2)
10041 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010042 else
10043 spacePush(ctxt, *ctxt->space);
10044
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010045 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010046#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010047 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010048#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010049 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010050#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010051 else
10052 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010053#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010054 if (ctxt->instate == XML_PARSER_EOF)
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010055 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010056 if (name == NULL) {
10057 spacePop(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010058 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010059 }
Elliott Hughese54f00d2021-05-13 08:13:46 -070010060 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010061 ret = ctxt->node;
10062
Daniel Veillard4432df22003-09-28 18:58:27 +000010063#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010064 /*
10065 * [ VC: Root Element Type ]
10066 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010067 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010068 */
10069 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10070 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10071 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010072#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010073
10074 /*
10075 * Check for an Empty Element.
10076 */
10077 if ((RAW == '/') && (NXT(1) == '>')) {
10078 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010079 if (ctxt->sax2) {
10080 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10081 (!ctxt->disableSAX))
10082 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010083#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010084 } else {
10085 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10086 (!ctxt->disableSAX))
10087 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010088#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010089 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010090 namePop(ctxt);
10091 spacePop(ctxt);
10092 if (nsNr != ctxt->nsNr)
10093 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010094 if ( ret != NULL && ctxt->record_info ) {
10095 node_info.end_pos = ctxt->input->consumed +
10096 (CUR_PTR - ctxt->input->base);
10097 node_info.end_line = ctxt->input->line;
10098 node_info.node = ret;
10099 xmlParserAddNodeInfo(ctxt, &node_info);
10100 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010101 return(1);
Owen Taylor3473f882001-02-23 17:55:21 +000010102 }
10103 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010104 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010105 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010106 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10107 "Couldn't find end of Start Tag %s line %d\n",
10108 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010109
10110 /*
10111 * end of parsing of this node.
10112 */
10113 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010114 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010115 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010116 if (nsNr != ctxt->nsNr)
10117 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010118
10119 /*
10120 * Capture end position and add node
10121 */
10122 if ( ret != NULL && ctxt->record_info ) {
10123 node_info.end_pos = ctxt->input->consumed +
10124 (CUR_PTR - ctxt->input->base);
10125 node_info.end_line = ctxt->input->line;
10126 node_info.node = ret;
10127 xmlParserAddNodeInfo(ctxt, &node_info);
10128 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010129 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010130 }
10131
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010132 return(0);
10133}
Owen Taylor3473f882001-02-23 17:55:21 +000010134
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010135/**
10136 * xmlParseElementEnd:
10137 * @ctxt: an XML parser context
10138 *
10139 * Parse the end of an XML element.
10140 */
10141static void
10142xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10143 xmlParserNodeInfo node_info;
10144 xmlNodePtr ret = ctxt->node;
10145
10146 if (ctxt->nameNr <= 0)
10147 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010148
10149 /*
10150 * parse the end of tag: '</' should be here.
10151 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010152 if (ctxt->sax2) {
Elliott Hughese54f00d2021-05-13 08:13:46 -070010153 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010154 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010155 }
10156#ifdef LIBXML_SAX1_ENABLED
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010157 else
10158 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010159#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010160
10161 /*
10162 * Capture end position and add node
10163 */
10164 if ( ret != NULL && ctxt->record_info ) {
10165 node_info.end_pos = ctxt->input->consumed +
10166 (CUR_PTR - ctxt->input->base);
10167 node_info.end_line = ctxt->input->line;
10168 node_info.node = ret;
10169 xmlParserAddNodeInfo(ctxt, &node_info);
10170 }
10171}
10172
10173/**
10174 * xmlParseVersionNum:
10175 * @ctxt: an XML parser context
10176 *
10177 * parse the XML version value.
10178 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010179 * [26] VersionNum ::= '1.' [0-9]+
10180 *
10181 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010182 *
10183 * Returns the string giving the XML version number, or NULL
10184 */
10185xmlChar *
10186xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10187 xmlChar *buf = NULL;
10188 int len = 0;
10189 int size = 10;
10190 xmlChar cur;
10191
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010192 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010193 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010194 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010195 return(NULL);
10196 }
10197 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010198 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010199 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010200 return(NULL);
10201 }
10202 buf[len++] = cur;
10203 NEXT;
10204 cur=CUR;
10205 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010206 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010207 return(NULL);
10208 }
10209 buf[len++] = cur;
10210 NEXT;
10211 cur=CUR;
10212 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010213 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010214 xmlChar *tmp;
10215
Owen Taylor3473f882001-02-23 17:55:21 +000010216 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010217 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10218 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010219 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010220 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010221 return(NULL);
10222 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010223 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010224 }
10225 buf[len++] = cur;
10226 NEXT;
10227 cur=CUR;
10228 }
10229 buf[len] = 0;
10230 return(buf);
10231}
10232
10233/**
10234 * xmlParseVersionInfo:
10235 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010236 *
Owen Taylor3473f882001-02-23 17:55:21 +000010237 * parse the XML version.
10238 *
10239 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010240 *
Owen Taylor3473f882001-02-23 17:55:21 +000010241 * [25] Eq ::= S? '=' S?
10242 *
10243 * Returns the version string, e.g. "1.0"
10244 */
10245
10246xmlChar *
10247xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10248 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010249
Daniel Veillarda07050d2003-10-19 14:46:32 +000010250 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010251 SKIP(7);
10252 SKIP_BLANKS;
10253 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010254 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010255 return(NULL);
10256 }
10257 NEXT;
10258 SKIP_BLANKS;
10259 if (RAW == '"') {
10260 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010261 version = xmlParseVersionNum(ctxt);
10262 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010263 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010264 } else
10265 NEXT;
10266 } else if (RAW == '\''){
10267 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010268 version = xmlParseVersionNum(ctxt);
10269 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010270 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010271 } else
10272 NEXT;
10273 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010274 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010275 }
10276 }
10277 return(version);
10278}
10279
10280/**
10281 * xmlParseEncName:
10282 * @ctxt: an XML parser context
10283 *
10284 * parse the XML encoding name
10285 *
10286 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10287 *
10288 * Returns the encoding name value or NULL
10289 */
10290xmlChar *
10291xmlParseEncName(xmlParserCtxtPtr ctxt) {
10292 xmlChar *buf = NULL;
10293 int len = 0;
10294 int size = 10;
10295 xmlChar cur;
10296
10297 cur = CUR;
10298 if (((cur >= 'a') && (cur <= 'z')) ||
10299 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010300 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010301 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010302 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010303 return(NULL);
10304 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010305
Owen Taylor3473f882001-02-23 17:55:21 +000010306 buf[len++] = cur;
10307 NEXT;
10308 cur = CUR;
10309 while (((cur >= 'a') && (cur <= 'z')) ||
10310 ((cur >= 'A') && (cur <= 'Z')) ||
10311 ((cur >= '0') && (cur <= '9')) ||
10312 (cur == '.') || (cur == '_') ||
10313 (cur == '-')) {
10314 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010315 xmlChar *tmp;
10316
Owen Taylor3473f882001-02-23 17:55:21 +000010317 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010318 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10319 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010320 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010321 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010322 return(NULL);
10323 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010324 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010325 }
10326 buf[len++] = cur;
10327 NEXT;
10328 cur = CUR;
10329 if (cur == 0) {
10330 SHRINK;
10331 GROW;
10332 cur = CUR;
10333 }
10334 }
10335 buf[len] = 0;
10336 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010337 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010338 }
10339 return(buf);
10340}
10341
10342/**
10343 * xmlParseEncodingDecl:
10344 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010345 *
Owen Taylor3473f882001-02-23 17:55:21 +000010346 * parse the XML encoding declaration
10347 *
10348 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10349 *
10350 * this setups the conversion filters.
10351 *
10352 * Returns the encoding value or NULL
10353 */
10354
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010355const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010356xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10357 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010358
10359 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010360 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010361 SKIP(8);
10362 SKIP_BLANKS;
10363 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010364 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010365 return(NULL);
10366 }
10367 NEXT;
10368 SKIP_BLANKS;
10369 if (RAW == '"') {
10370 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010371 encoding = xmlParseEncName(ctxt);
10372 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010373 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010374 xmlFree((xmlChar *) encoding);
10375 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010376 } else
10377 NEXT;
10378 } else if (RAW == '\''){
10379 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010380 encoding = xmlParseEncName(ctxt);
10381 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010382 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010383 xmlFree((xmlChar *) encoding);
10384 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010385 } else
10386 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010387 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010388 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010389 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010390
10391 /*
10392 * Non standard parsing, allowing the user to ignore encoding
10393 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010394 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10395 xmlFree((xmlChar *) encoding);
10396 return(NULL);
10397 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010398
Daniel Veillard6b621b82003-08-11 15:03:34 +000010399 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010400 * UTF-16 encoding switch has already taken place at this stage,
Daniel Veillard6b621b82003-08-11 15:03:34 +000010401 * more over the little-endian/big-endian selection is already done
10402 */
10403 if ((encoding != NULL) &&
10404 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10405 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010406 /*
10407 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010408 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010409 * document is apparently UTF-8 compatible, then raise an
10410 * encoding mismatch fatal error
10411 */
10412 if ((ctxt->encoding == NULL) &&
10413 (ctxt->input->buf != NULL) &&
10414 (ctxt->input->buf->encoder == NULL)) {
10415 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10416 "Document labelled UTF-16 but has UTF-8 content\n");
10417 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010418 if (ctxt->encoding != NULL)
10419 xmlFree((xmlChar *) ctxt->encoding);
10420 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010421 }
10422 /*
10423 * UTF-8 encoding is handled natively
10424 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010425 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010426 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10427 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010428 if (ctxt->encoding != NULL)
10429 xmlFree((xmlChar *) ctxt->encoding);
10430 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010431 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010432 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010433 xmlCharEncodingHandlerPtr handler;
10434
10435 if (ctxt->input->encoding != NULL)
10436 xmlFree((xmlChar *) ctxt->input->encoding);
10437 ctxt->input->encoding = encoding;
10438
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010439 handler = xmlFindCharEncodingHandler((const char *) encoding);
10440 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010441 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10442 /* failed to convert */
10443 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10444 return(NULL);
10445 }
Owen Taylor3473f882001-02-23 17:55:21 +000010446 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010447 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010448 "Unsupported encoding %s\n", encoding);
10449 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010450 }
10451 }
10452 }
10453 return(encoding);
10454}
10455
10456/**
10457 * xmlParseSDDecl:
10458 * @ctxt: an XML parser context
10459 *
10460 * parse the XML standalone declaration
10461 *
10462 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010463 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010464 *
10465 * [ VC: Standalone Document Declaration ]
10466 * TODO The standalone document declaration must have the value "no"
10467 * if any external markup declarations contain declarations of:
10468 * - attributes with default values, if elements to which these
10469 * attributes apply appear in the document without specifications
10470 * of values for these attributes, or
10471 * - entities (other than amp, lt, gt, apos, quot), if references
10472 * to those entities appear in the document, or
10473 * - attributes with values subject to normalization, where the
10474 * attribute appears in the document with a value which will change
10475 * as a result of normalization, or
10476 * - element types with element content, if white space occurs directly
10477 * within any instance of those types.
10478 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010479 * Returns:
10480 * 1 if standalone="yes"
10481 * 0 if standalone="no"
10482 * -2 if standalone attribute is missing or invalid
10483 * (A standalone value of -2 means that the XML declaration was found,
10484 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010485 */
10486
10487int
10488xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010489 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010490
10491 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010492 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010493 SKIP(10);
10494 SKIP_BLANKS;
10495 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010496 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010497 return(standalone);
10498 }
10499 NEXT;
10500 SKIP_BLANKS;
10501 if (RAW == '\''){
10502 NEXT;
10503 if ((RAW == 'n') && (NXT(1) == 'o')) {
10504 standalone = 0;
10505 SKIP(2);
10506 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10507 (NXT(2) == 's')) {
10508 standalone = 1;
10509 SKIP(3);
10510 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010511 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010512 }
10513 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010515 } else
10516 NEXT;
10517 } else if (RAW == '"'){
10518 NEXT;
10519 if ((RAW == 'n') && (NXT(1) == 'o')) {
10520 standalone = 0;
10521 SKIP(2);
10522 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10523 (NXT(2) == 's')) {
10524 standalone = 1;
10525 SKIP(3);
10526 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010527 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010528 }
10529 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010530 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010531 } else
10532 NEXT;
10533 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010534 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010535 }
10536 }
10537 return(standalone);
10538}
10539
10540/**
10541 * xmlParseXMLDecl:
10542 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010543 *
Owen Taylor3473f882001-02-23 17:55:21 +000010544 * parse an XML declaration header
10545 *
10546 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10547 */
10548
10549void
10550xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10551 xmlChar *version;
10552
10553 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010554 * This value for standalone indicates that the document has an
10555 * XML declaration but it does not have a standalone attribute.
10556 * It will be overwritten later if a standalone attribute is found.
10557 */
10558 ctxt->input->standalone = -2;
10559
10560 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010561 * We know that '<?xml' is here.
10562 */
10563 SKIP(5);
10564
William M. Brack76e95df2003-10-18 16:20:14 +000010565 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010566 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10567 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010568 }
10569 SKIP_BLANKS;
10570
10571 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010572 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010573 */
10574 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010575 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010576 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010577 } else {
10578 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10579 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010580 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010581 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010582 if (ctxt->options & XML_PARSE_OLD10) {
10583 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10584 "Unsupported version '%s'\n",
10585 version);
10586 } else {
10587 if ((version[0] == '1') && ((version[1] == '.'))) {
10588 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10589 "Unsupported version '%s'\n",
10590 version, NULL);
10591 } else {
10592 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10593 "Unsupported version '%s'\n",
10594 version);
10595 }
10596 }
Daniel Veillard19840942001-11-29 16:11:38 +000010597 }
10598 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010599 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010600 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010601 }
Owen Taylor3473f882001-02-23 17:55:21 +000010602
10603 /*
10604 * We may have the encoding declaration
10605 */
William M. Brack76e95df2003-10-18 16:20:14 +000010606 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010607 if ((RAW == '?') && (NXT(1) == '>')) {
10608 SKIP(2);
10609 return;
10610 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010611 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010612 }
10613 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010614 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10615 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010616 /*
10617 * The XML REC instructs us to stop parsing right here
10618 */
10619 return;
10620 }
10621
10622 /*
10623 * We may have the standalone status.
10624 */
William M. Brack76e95df2003-10-18 16:20:14 +000010625 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010626 if ((RAW == '?') && (NXT(1) == '>')) {
10627 SKIP(2);
10628 return;
10629 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010631 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010632
10633 /*
10634 * We can grow the input buffer freely at that point
10635 */
10636 GROW;
10637
Owen Taylor3473f882001-02-23 17:55:21 +000010638 SKIP_BLANKS;
10639 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10640
10641 SKIP_BLANKS;
10642 if ((RAW == '?') && (NXT(1) == '>')) {
10643 SKIP(2);
10644 } else if (RAW == '>') {
10645 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010646 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010647 NEXT;
10648 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010649 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010650 MOVETO_ENDTAG(CUR_PTR);
10651 NEXT;
10652 }
10653}
10654
10655/**
10656 * xmlParseMisc:
10657 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010658 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010659 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010660 *
10661 * [27] Misc ::= Comment | PI | S
10662 */
10663
10664void
10665xmlParseMisc(xmlParserCtxtPtr ctxt) {
Elliott Hughesecdab2a2022-02-23 14:33:50 -080010666 while (ctxt->instate != XML_PARSER_EOF) {
10667 SKIP_BLANKS;
10668 GROW;
Daniel Veillard561b7f82002-03-20 21:55:57 +000010669 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010670 xmlParsePI(ctxt);
Elliott Hughesecdab2a2022-02-23 14:33:50 -080010671 } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010672 xmlParseComment(ctxt);
Elliott Hughesecdab2a2022-02-23 14:33:50 -080010673 } else {
10674 break;
10675 }
Owen Taylor3473f882001-02-23 17:55:21 +000010676 }
10677}
10678
10679/**
10680 * xmlParseDocument:
10681 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010682 *
Owen Taylor3473f882001-02-23 17:55:21 +000010683 * parse an XML document (and build a tree if using the standard SAX
10684 * interface).
10685 *
10686 * [1] document ::= prolog element Misc*
10687 *
10688 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10689 *
10690 * Returns 0, -1 in case of error. the parser context is augmented
10691 * as a result of the parsing.
10692 */
10693
10694int
10695xmlParseDocument(xmlParserCtxtPtr ctxt) {
10696 xmlChar start[4];
10697 xmlCharEncoding enc;
10698
10699 xmlInitParser();
10700
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010701 if ((ctxt == NULL) || (ctxt->input == NULL))
10702 return(-1);
10703
Owen Taylor3473f882001-02-23 17:55:21 +000010704 GROW;
10705
10706 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010707 * SAX: detecting the level.
10708 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010709 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010710
10711 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010712 * SAX: beginning of the document processing.
10713 */
10714 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10715 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010716 if (ctxt->instate == XML_PARSER_EOF)
10717 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010718
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010719 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010720 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010721 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010722 * Get the 4 first bytes and decode the charset
10723 * if enc != XML_CHAR_ENCODING_NONE
10724 * plug some encoding conversion routines.
10725 */
10726 start[0] = RAW;
10727 start[1] = NXT(1);
10728 start[2] = NXT(2);
10729 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010730 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010731 if (enc != XML_CHAR_ENCODING_NONE) {
10732 xmlSwitchEncoding(ctxt, enc);
10733 }
Owen Taylor3473f882001-02-23 17:55:21 +000010734 }
10735
10736
10737 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010738 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010739 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010740 }
10741
10742 /*
10743 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010744 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010745 * than just the first line, unless the amount of data is really
10746 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010747 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010748 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10749 GROW;
10750 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010751 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010752
10753 /*
10754 * Note that we will switch encoding on the fly.
10755 */
10756 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010757 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10758 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010759 /*
10760 * The XML REC instructs us to stop parsing right here
10761 */
10762 return(-1);
10763 }
10764 ctxt->standalone = ctxt->input->standalone;
10765 SKIP_BLANKS;
10766 } else {
10767 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10768 }
10769 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10770 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010771 if (ctxt->instate == XML_PARSER_EOF)
10772 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010773 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10774 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10775 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10776 }
Owen Taylor3473f882001-02-23 17:55:21 +000010777
10778 /*
10779 * The Misc part of the Prolog
10780 */
Owen Taylor3473f882001-02-23 17:55:21 +000010781 xmlParseMisc(ctxt);
10782
10783 /*
10784 * Then possibly doc type declaration(s) and more Misc
10785 * (doctypedecl Misc*)?
10786 */
10787 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010788 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010789
10790 ctxt->inSubset = 1;
10791 xmlParseDocTypeDecl(ctxt);
10792 if (RAW == '[') {
10793 ctxt->instate = XML_PARSER_DTD;
10794 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010795 if (ctxt->instate == XML_PARSER_EOF)
10796 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010797 }
10798
10799 /*
10800 * Create and update the external subset.
10801 */
10802 ctxt->inSubset = 2;
10803 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10804 (!ctxt->disableSAX))
10805 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10806 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010807 if (ctxt->instate == XML_PARSER_EOF)
10808 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010809 ctxt->inSubset = 0;
10810
Daniel Veillardac4118d2008-01-11 05:27:32 +000010811 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010812
10813 ctxt->instate = XML_PARSER_PROLOG;
10814 xmlParseMisc(ctxt);
10815 }
10816
10817 /*
10818 * Time to start parsing the tree itself
10819 */
10820 GROW;
10821 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010822 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10823 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010824 } else {
10825 ctxt->instate = XML_PARSER_CONTENT;
10826 xmlParseElement(ctxt);
10827 ctxt->instate = XML_PARSER_EPILOG;
10828
10829
10830 /*
10831 * The Misc part at the end
10832 */
10833 xmlParseMisc(ctxt);
10834
Daniel Veillard561b7f82002-03-20 21:55:57 +000010835 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010836 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010837 }
10838 ctxt->instate = XML_PARSER_EOF;
10839 }
10840
10841 /*
10842 * SAX: end of the document processing.
10843 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010844 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010845 ctxt->sax->endDocument(ctxt->userData);
10846
Daniel Veillard5997aca2002-03-18 18:36:20 +000010847 /*
10848 * Remove locally kept entity definitions if the tree was not built
10849 */
10850 if ((ctxt->myDoc != NULL) &&
10851 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10852 xmlFreeDoc(ctxt->myDoc);
10853 ctxt->myDoc = NULL;
10854 }
10855
Daniel Veillardae0765b2008-07-31 19:54:59 +000010856 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10857 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10858 if (ctxt->valid)
10859 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10860 if (ctxt->nsWellFormed)
10861 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10862 if (ctxt->options & XML_PARSE_OLD10)
10863 ctxt->myDoc->properties |= XML_DOC_OLD10;
10864 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010865 if (! ctxt->wellFormed) {
10866 ctxt->valid = 0;
10867 return(-1);
10868 }
Owen Taylor3473f882001-02-23 17:55:21 +000010869 return(0);
10870}
10871
10872/**
10873 * xmlParseExtParsedEnt:
10874 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010875 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010876 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010877 * An external general parsed entity is well-formed if it matches the
10878 * production labeled extParsedEnt.
10879 *
10880 * [78] extParsedEnt ::= TextDecl? content
10881 *
10882 * Returns 0, -1 in case of error. the parser context is augmented
10883 * as a result of the parsing.
10884 */
10885
10886int
10887xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10888 xmlChar start[4];
10889 xmlCharEncoding enc;
10890
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010891 if ((ctxt == NULL) || (ctxt->input == NULL))
10892 return(-1);
10893
Owen Taylor3473f882001-02-23 17:55:21 +000010894 xmlDefaultSAXHandlerInit();
10895
Daniel Veillard309f81d2003-09-23 09:02:53 +000010896 xmlDetectSAX2(ctxt);
10897
Owen Taylor3473f882001-02-23 17:55:21 +000010898 GROW;
10899
10900 /*
10901 * SAX: beginning of the document processing.
10902 */
10903 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10904 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10905
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010906 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010907 * Get the 4 first bytes and decode the charset
10908 * if enc != XML_CHAR_ENCODING_NONE
10909 * plug some encoding conversion routines.
10910 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010911 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10912 start[0] = RAW;
10913 start[1] = NXT(1);
10914 start[2] = NXT(2);
10915 start[3] = NXT(3);
10916 enc = xmlDetectCharEncoding(start, 4);
10917 if (enc != XML_CHAR_ENCODING_NONE) {
10918 xmlSwitchEncoding(ctxt, enc);
10919 }
Owen Taylor3473f882001-02-23 17:55:21 +000010920 }
10921
10922
10923 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010924 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010925 }
10926
10927 /*
10928 * Check for the XMLDecl in the Prolog.
10929 */
10930 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010931 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010932
10933 /*
10934 * Note that we will switch encoding on the fly.
10935 */
10936 xmlParseXMLDecl(ctxt);
10937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10938 /*
10939 * The XML REC instructs us to stop parsing right here
10940 */
10941 return(-1);
10942 }
10943 SKIP_BLANKS;
10944 } else {
10945 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10946 }
10947 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10948 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010949 if (ctxt->instate == XML_PARSER_EOF)
10950 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010951
10952 /*
10953 * Doing validity checking on chunk doesn't make sense
10954 */
10955 ctxt->instate = XML_PARSER_CONTENT;
10956 ctxt->validate = 0;
10957 ctxt->loadsubset = 0;
10958 ctxt->depth = 0;
10959
10960 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010961 if (ctxt->instate == XML_PARSER_EOF)
10962 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010963
Owen Taylor3473f882001-02-23 17:55:21 +000010964 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010965 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010966 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010967 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010968 }
10969
10970 /*
10971 * SAX: end of the document processing.
10972 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010973 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010974 ctxt->sax->endDocument(ctxt->userData);
10975
10976 if (! ctxt->wellFormed) return(-1);
10977 return(0);
10978}
10979
Daniel Veillard73b013f2003-09-30 12:36:01 +000010980#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010981/************************************************************************
10982 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010983 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010984 * *
10985 ************************************************************************/
10986
10987/**
10988 * xmlParseLookupSequence:
10989 * @ctxt: an XML parser context
10990 * @first: the first char to lookup
10991 * @next: the next char to lookup or zero
10992 * @third: the next char to lookup or zero
10993 *
10994 * Try to find if a sequence (first, next, third) or just (first next) or
10995 * (first) is available in the input stream.
10996 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10997 * to avoid rescanning sequences of bytes, it DOES change the state of the
10998 * parser, do not use liberally.
10999 *
11000 * Returns the index to the current parsing point if the full sequence
11001 * is available, -1 otherwise.
11002 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011003static int
Owen Taylor3473f882001-02-23 17:55:21 +000011004xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11005 xmlChar next, xmlChar third) {
11006 int base, len;
11007 xmlParserInputPtr in;
11008 const xmlChar *buf;
11009
11010 in = ctxt->input;
11011 if (in == NULL) return(-1);
11012 base = in->cur - in->base;
11013 if (base < 0) return(-1);
11014 if (ctxt->checkIndex > base)
11015 base = ctxt->checkIndex;
11016 if (in->buf == NULL) {
11017 buf = in->base;
11018 len = in->length;
11019 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011020 buf = xmlBufContent(in->buf->buffer);
11021 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011022 }
11023 /* take into account the sequence length */
11024 if (third) len -= 2;
11025 else if (next) len --;
11026 for (;base < len;base++) {
11027 if (buf[base] == first) {
11028 if (third != 0) {
11029 if ((buf[base + 1] != next) ||
11030 (buf[base + 2] != third)) continue;
11031 } else if (next != 0) {
11032 if (buf[base + 1] != next) continue;
11033 }
11034 ctxt->checkIndex = 0;
11035#ifdef DEBUG_PUSH
11036 if (next == 0)
11037 xmlGenericError(xmlGenericErrorContext,
11038 "PP: lookup '%c' found at %d\n",
11039 first, base);
11040 else if (third == 0)
11041 xmlGenericError(xmlGenericErrorContext,
11042 "PP: lookup '%c%c' found at %d\n",
11043 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011044 else
Owen Taylor3473f882001-02-23 17:55:21 +000011045 xmlGenericError(xmlGenericErrorContext,
11046 "PP: lookup '%c%c%c' found at %d\n",
11047 first, next, third, base);
11048#endif
11049 return(base - (in->cur - in->base));
11050 }
11051 }
11052 ctxt->checkIndex = base;
11053#ifdef DEBUG_PUSH
11054 if (next == 0)
11055 xmlGenericError(xmlGenericErrorContext,
11056 "PP: lookup '%c' failed\n", first);
11057 else if (third == 0)
11058 xmlGenericError(xmlGenericErrorContext,
11059 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011060 else
Owen Taylor3473f882001-02-23 17:55:21 +000011061 xmlGenericError(xmlGenericErrorContext,
11062 "PP: lookup '%c%c%c' failed\n", first, next, third);
11063#endif
11064 return(-1);
11065}
11066
11067/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011068 * xmlParseGetLasts:
11069 * @ctxt: an XML parser context
11070 * @lastlt: pointer to store the last '<' from the input
11071 * @lastgt: pointer to store the last '>' from the input
11072 *
11073 * Lookup the last < and > in the current chunk
11074 */
11075static void
11076xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11077 const xmlChar **lastgt) {
11078 const xmlChar *tmp;
11079
11080 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11081 xmlGenericError(xmlGenericErrorContext,
11082 "Internal error: xmlParseGetLasts\n");
11083 return;
11084 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011085 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011086 tmp = ctxt->input->end;
11087 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011088 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011089 if (tmp < ctxt->input->base) {
11090 *lastlt = NULL;
11091 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011092 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011093 *lastlt = tmp;
11094 tmp++;
11095 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11096 if (*tmp == '\'') {
11097 tmp++;
11098 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11099 if (tmp < ctxt->input->end) tmp++;
11100 } else if (*tmp == '"') {
11101 tmp++;
11102 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11103 if (tmp < ctxt->input->end) tmp++;
11104 } else
11105 tmp++;
11106 }
11107 if (tmp < ctxt->input->end)
11108 *lastgt = tmp;
11109 else {
11110 tmp = *lastlt;
11111 tmp--;
11112 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11113 if (tmp >= ctxt->input->base)
11114 *lastgt = tmp;
11115 else
11116 *lastgt = NULL;
11117 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011118 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011119 } else {
11120 *lastlt = NULL;
11121 *lastgt = NULL;
11122 }
11123}
11124/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011125 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011126 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011127 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011128 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011129 *
11130 * Check that the block of characters is okay as SCdata content [20]
11131 *
11132 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011133 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011134 */
11135static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011136xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011137 int ix;
11138 unsigned char c;
11139 int codepoint;
11140
11141 if ((utf == NULL) || (len <= 0))
11142 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011143
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011144 for (ix = 0; ix < len;) { /* string is 0-terminated */
11145 c = utf[ix];
11146 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11147 if (c >= 0x20)
11148 ix++;
11149 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11150 ix++;
11151 else
11152 return(-ix);
11153 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011154 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011155 if ((utf[ix+1] & 0xc0 ) != 0x80)
11156 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011157 codepoint = (utf[ix] & 0x1f) << 6;
11158 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011159 if (!xmlIsCharQ(codepoint))
11160 return(-ix);
11161 ix += 2;
11162 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011163 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011164 if (((utf[ix+1] & 0xc0) != 0x80) ||
11165 ((utf[ix+2] & 0xc0) != 0x80))
11166 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011167 codepoint = (utf[ix] & 0xf) << 12;
11168 codepoint |= (utf[ix+1] & 0x3f) << 6;
11169 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011170 if (!xmlIsCharQ(codepoint))
11171 return(-ix);
11172 ix += 3;
11173 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011174 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011175 if (((utf[ix+1] & 0xc0) != 0x80) ||
11176 ((utf[ix+2] & 0xc0) != 0x80) ||
11177 ((utf[ix+3] & 0xc0) != 0x80))
11178 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011179 codepoint = (utf[ix] & 0x7) << 18;
11180 codepoint |= (utf[ix+1] & 0x3f) << 12;
11181 codepoint |= (utf[ix+2] & 0x3f) << 6;
11182 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011183 if (!xmlIsCharQ(codepoint))
11184 return(-ix);
11185 ix += 4;
11186 } else /* unknown encoding */
11187 return(-ix);
11188 }
11189 return(ix);
11190}
11191
11192/**
Owen Taylor3473f882001-02-23 17:55:21 +000011193 * xmlParseTryOrFinish:
11194 * @ctxt: an XML parser context
11195 * @terminate: last chunk indicator
11196 *
11197 * Try to progress on parsing
11198 *
11199 * Returns zero if no parsing was possible
11200 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011201static int
Owen Taylor3473f882001-02-23 17:55:21 +000011202xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11203 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011204 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011205 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011206 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011207
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011208 if (ctxt->input == NULL)
11209 return(0);
11210
Owen Taylor3473f882001-02-23 17:55:21 +000011211#ifdef DEBUG_PUSH
11212 switch (ctxt->instate) {
11213 case XML_PARSER_EOF:
11214 xmlGenericError(xmlGenericErrorContext,
11215 "PP: try EOF\n"); break;
11216 case XML_PARSER_START:
11217 xmlGenericError(xmlGenericErrorContext,
11218 "PP: try START\n"); break;
11219 case XML_PARSER_MISC:
11220 xmlGenericError(xmlGenericErrorContext,
11221 "PP: try MISC\n");break;
11222 case XML_PARSER_COMMENT:
11223 xmlGenericError(xmlGenericErrorContext,
11224 "PP: try COMMENT\n");break;
11225 case XML_PARSER_PROLOG:
11226 xmlGenericError(xmlGenericErrorContext,
11227 "PP: try PROLOG\n");break;
11228 case XML_PARSER_START_TAG:
11229 xmlGenericError(xmlGenericErrorContext,
11230 "PP: try START_TAG\n");break;
11231 case XML_PARSER_CONTENT:
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: try CONTENT\n");break;
11234 case XML_PARSER_CDATA_SECTION:
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: try CDATA_SECTION\n");break;
11237 case XML_PARSER_END_TAG:
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: try END_TAG\n");break;
11240 case XML_PARSER_ENTITY_DECL:
11241 xmlGenericError(xmlGenericErrorContext,
11242 "PP: try ENTITY_DECL\n");break;
11243 case XML_PARSER_ENTITY_VALUE:
11244 xmlGenericError(xmlGenericErrorContext,
11245 "PP: try ENTITY_VALUE\n");break;
11246 case XML_PARSER_ATTRIBUTE_VALUE:
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: try ATTRIBUTE_VALUE\n");break;
11249 case XML_PARSER_DTD:
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: try DTD\n");break;
11252 case XML_PARSER_EPILOG:
11253 xmlGenericError(xmlGenericErrorContext,
11254 "PP: try EPILOG\n");break;
11255 case XML_PARSER_PI:
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: try PI\n");break;
11258 case XML_PARSER_IGNORE:
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: try IGNORE\n");break;
11261 }
11262#endif
11263
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011264 if ((ctxt->input != NULL) &&
11265 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011266 xmlSHRINK(ctxt);
11267 ctxt->checkIndex = 0;
11268 }
11269 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011270
Daniel Veillarde50ba812013-04-11 15:54:51 +080011271 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011272 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011273 return(0);
11274
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011275 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011276 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011277 avail = ctxt->input->length -
11278 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011279 else {
11280 /*
11281 * If we are operating on converted input, try to flush
Haibo Huangcfd91dc2020-07-30 23:01:33 -070011282 * remaining chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011283 * buffer. But do not do this in document start where
11284 * encoding="..." may not have been read and we work on a
11285 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011286 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011287 if ((ctxt->instate != XML_PARSER_START) &&
11288 (ctxt->input->buf->raw != NULL) &&
11289 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011290 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11291 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011292 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011293
11294 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011295 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11296 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011297 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011298 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011299 (ctxt->input->cur - ctxt->input->base);
11300 }
Owen Taylor3473f882001-02-23 17:55:21 +000011301 if (avail < 1)
11302 goto done;
11303 switch (ctxt->instate) {
11304 case XML_PARSER_EOF:
11305 /*
11306 * Document parsing is done !
11307 */
11308 goto done;
11309 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011310 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11311 xmlChar start[4];
11312 xmlCharEncoding enc;
11313
11314 /*
11315 * Very first chars read from the document flow.
11316 */
11317 if (avail < 4)
11318 goto done;
11319
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011320 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011321 * Get the 4 first bytes and decode the charset
11322 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011323 * plug some encoding conversion routines,
11324 * else xmlSwitchEncoding will set to (default)
11325 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011326 */
11327 start[0] = RAW;
11328 start[1] = NXT(1);
11329 start[2] = NXT(2);
11330 start[3] = NXT(3);
11331 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011332 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011333 break;
11334 }
Owen Taylor3473f882001-02-23 17:55:21 +000011335
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011336 if (avail < 2)
11337 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011338 cur = ctxt->input->cur[0];
11339 next = ctxt->input->cur[1];
11340 if (cur == 0) {
11341 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11342 ctxt->sax->setDocumentLocator(ctxt->userData,
11343 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011344 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011345 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011346#ifdef DEBUG_PUSH
11347 xmlGenericError(xmlGenericErrorContext,
11348 "PP: entering EOF\n");
11349#endif
11350 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11351 ctxt->sax->endDocument(ctxt->userData);
11352 goto done;
11353 }
11354 if ((cur == '<') && (next == '?')) {
11355 /* PI or XML decl */
11356 if (avail < 5) return(ret);
11357 if ((!terminate) &&
11358 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11359 return(ret);
11360 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11361 ctxt->sax->setDocumentLocator(ctxt->userData,
11362 &xmlDefaultSAXLocator);
11363 if ((ctxt->input->cur[2] == 'x') &&
11364 (ctxt->input->cur[3] == 'm') &&
11365 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011366 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011367 ret += 5;
11368#ifdef DEBUG_PUSH
11369 xmlGenericError(xmlGenericErrorContext,
11370 "PP: Parsing XML Decl\n");
11371#endif
11372 xmlParseXMLDecl(ctxt);
11373 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11374 /*
11375 * The XML REC instructs us to stop parsing right
11376 * here
11377 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011378 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011379 return(0);
11380 }
11381 ctxt->standalone = ctxt->input->standalone;
11382 if ((ctxt->encoding == NULL) &&
11383 (ctxt->input->encoding != NULL))
11384 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11385 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11386 (!ctxt->disableSAX))
11387 ctxt->sax->startDocument(ctxt->userData);
11388 ctxt->instate = XML_PARSER_MISC;
11389#ifdef DEBUG_PUSH
11390 xmlGenericError(xmlGenericErrorContext,
11391 "PP: entering MISC\n");
11392#endif
11393 } else {
11394 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11395 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11396 (!ctxt->disableSAX))
11397 ctxt->sax->startDocument(ctxt->userData);
11398 ctxt->instate = XML_PARSER_MISC;
11399#ifdef DEBUG_PUSH
11400 xmlGenericError(xmlGenericErrorContext,
11401 "PP: entering MISC\n");
11402#endif
11403 }
11404 } else {
11405 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11406 ctxt->sax->setDocumentLocator(ctxt->userData,
11407 &xmlDefaultSAXLocator);
11408 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011409 if (ctxt->version == NULL) {
11410 xmlErrMemory(ctxt, NULL);
11411 break;
11412 }
Owen Taylor3473f882001-02-23 17:55:21 +000011413 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11414 (!ctxt->disableSAX))
11415 ctxt->sax->startDocument(ctxt->userData);
11416 ctxt->instate = XML_PARSER_MISC;
11417#ifdef DEBUG_PUSH
11418 xmlGenericError(xmlGenericErrorContext,
11419 "PP: entering MISC\n");
11420#endif
11421 }
11422 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011423 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011424 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011425 const xmlChar *prefix = NULL;
11426 const xmlChar *URI = NULL;
Elliott Hughese54f00d2021-05-13 08:13:46 -070011427 int line = ctxt->input->line;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011428 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011429
11430 if ((avail < 2) && (ctxt->inputNr == 1))
11431 goto done;
11432 cur = ctxt->input->cur[0];
11433 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011434 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011435 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011436 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11437 ctxt->sax->endDocument(ctxt->userData);
11438 goto done;
11439 }
11440 if (!terminate) {
11441 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011442 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011443 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011444 goto done;
11445 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11446 goto done;
11447 }
11448 }
11449 if (ctxt->spaceNr == 0)
11450 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011451 else if (*ctxt->space == -2)
11452 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011453 else
11454 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011455#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011456 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011457#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011458 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011459#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011460 else
11461 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011462#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011463 if (ctxt->instate == XML_PARSER_EOF)
11464 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011465 if (name == NULL) {
11466 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011467 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011468 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11469 ctxt->sax->endDocument(ctxt->userData);
11470 goto done;
11471 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011472#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011473 /*
11474 * [ VC: Root Element Type ]
11475 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011476 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011477 */
11478 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11479 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11480 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011481#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011482
11483 /*
11484 * Check for an Empty Element.
11485 */
11486 if ((RAW == '/') && (NXT(1) == '>')) {
11487 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011488
11489 if (ctxt->sax2) {
11490 if ((ctxt->sax != NULL) &&
11491 (ctxt->sax->endElementNs != NULL) &&
11492 (!ctxt->disableSAX))
11493 ctxt->sax->endElementNs(ctxt->userData, name,
11494 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011495 if (ctxt->nsNr - nsNr > 0)
11496 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011497#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011498 } else {
11499 if ((ctxt->sax != NULL) &&
11500 (ctxt->sax->endElement != NULL) &&
11501 (!ctxt->disableSAX))
11502 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011503#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011504 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011505 if (ctxt->instate == XML_PARSER_EOF)
11506 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011507 spacePop(ctxt);
11508 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011509 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011510 } else {
11511 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011512 }
Daniel Veillard65686452012-07-19 18:25:01 +080011513 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011514 break;
11515 }
11516 if (RAW == '>') {
11517 NEXT;
11518 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011519 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011520 "Couldn't find end of Start Tag %s\n",
11521 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011522 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011523 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011524 }
Elliott Hughese54f00d2021-05-13 08:13:46 -070011525 nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011526
Daniel Veillarda880b122003-04-21 21:36:41 +000011527 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011528 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011529 break;
11530 }
11531 case XML_PARSER_CONTENT: {
11532 const xmlChar *test;
11533 unsigned int cons;
11534 if ((avail < 2) && (ctxt->inputNr == 1))
11535 goto done;
11536 cur = ctxt->input->cur[0];
11537 next = ctxt->input->cur[1];
11538
11539 test = CUR_PTR;
11540 cons = ctxt->input->consumed;
11541 if ((cur == '<') && (next == '/')) {
11542 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011543 break;
11544 } else if ((cur == '<') && (next == '?')) {
11545 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011546 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11547 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011549 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011550 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011551 ctxt->instate = XML_PARSER_CONTENT;
11552 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011553 } else if ((cur == '<') && (next != '!')) {
11554 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011555 break;
11556 } else if ((cur == '<') && (next == '!') &&
11557 (ctxt->input->cur[2] == '-') &&
11558 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011559 int term;
11560
11561 if (avail < 4)
11562 goto done;
11563 ctxt->input->cur += 4;
11564 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11565 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011566 if ((!terminate) && (term < 0)) {
11567 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011568 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011569 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011570 xmlParseComment(ctxt);
11571 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011572 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011573 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11574 (ctxt->input->cur[2] == '[') &&
11575 (ctxt->input->cur[3] == 'C') &&
11576 (ctxt->input->cur[4] == 'D') &&
11577 (ctxt->input->cur[5] == 'A') &&
11578 (ctxt->input->cur[6] == 'T') &&
11579 (ctxt->input->cur[7] == 'A') &&
11580 (ctxt->input->cur[8] == '[')) {
11581 SKIP(9);
11582 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011583 break;
11584 } else if ((cur == '<') && (next == '!') &&
11585 (avail < 9)) {
11586 goto done;
11587 } else if (cur == '&') {
11588 if ((!terminate) &&
11589 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11590 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011591 xmlParseReference(ctxt);
11592 } else {
11593 /* TODO Avoid the extra copy, handle directly !!! */
11594 /*
11595 * Goal of the following test is:
11596 * - minimize calls to the SAX 'character' callback
11597 * when they are mergeable
11598 * - handle an problem for isBlank when we only parse
11599 * a sequence of blank chars and the next one is
11600 * not available to check against '<' presence.
11601 * - tries to homogenize the differences in SAX
11602 * callbacks between the push and pull versions
11603 * of the parser.
11604 */
11605 if ((ctxt->inputNr == 1) &&
11606 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11607 if (!terminate) {
11608 if (ctxt->progressive) {
11609 if ((lastlt == NULL) ||
11610 (ctxt->input->cur > lastlt))
11611 goto done;
11612 } else if (xmlParseLookupSequence(ctxt,
11613 '<', 0, 0) < 0) {
11614 goto done;
11615 }
11616 }
11617 }
11618 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011619 xmlParseCharData(ctxt, 0);
11620 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011621 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011622 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11623 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011624 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011625 break;
11626 }
11627 break;
11628 }
11629 case XML_PARSER_END_TAG:
11630 if (avail < 2)
11631 goto done;
11632 if (!terminate) {
11633 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011634 /* > can be found unescaped in attribute values */
11635 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011636 goto done;
11637 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11638 goto done;
11639 }
11640 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011641 if (ctxt->sax2) {
Elliott Hughese54f00d2021-05-13 08:13:46 -070011642 xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011643 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011644 }
11645#ifdef LIBXML_SAX1_ENABLED
11646 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011647 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011648#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011649 if (ctxt->instate == XML_PARSER_EOF) {
11650 /* Nothing */
11651 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011652 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011653 } else {
11654 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011655 }
11656 break;
11657 case XML_PARSER_CDATA_SECTION: {
11658 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011659 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011660 * cdataBlock merge back contiguous callbacks.
11661 */
11662 int base;
11663
11664 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11665 if (base < 0) {
11666 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011667 int tmp;
11668
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011669 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011670 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011671 if (tmp < 0) {
11672 tmp = -tmp;
11673 ctxt->input->cur += tmp;
11674 goto encoding_error;
11675 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011676 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11677 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011678 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011679 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011680 else if (ctxt->sax->characters != NULL)
11681 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011682 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011683 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011684 if (ctxt->instate == XML_PARSER_EOF)
11685 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011686 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011687 ctxt->checkIndex = 0;
11688 }
11689 goto done;
11690 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011691 int tmp;
11692
David Kilzer4f8606c2016-01-05 13:38:09 -080011693 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011694 if ((tmp < 0) || (tmp != base)) {
11695 tmp = -tmp;
11696 ctxt->input->cur += tmp;
11697 goto encoding_error;
11698 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011699 if ((ctxt->sax != NULL) && (base == 0) &&
11700 (ctxt->sax->cdataBlock != NULL) &&
11701 (!ctxt->disableSAX)) {
11702 /*
11703 * Special case to provide identical behaviour
11704 * between pull and push parsers on enpty CDATA
11705 * sections
11706 */
11707 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11708 (!strncmp((const char *)&ctxt->input->cur[-9],
11709 "<![CDATA[", 9)))
11710 ctxt->sax->cdataBlock(ctxt->userData,
11711 BAD_CAST "", 0);
11712 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011713 (!ctxt->disableSAX)) {
11714 if (ctxt->sax->cdataBlock != NULL)
11715 ctxt->sax->cdataBlock(ctxt->userData,
11716 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011717 else if (ctxt->sax->characters != NULL)
11718 ctxt->sax->characters(ctxt->userData,
11719 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011720 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011721 if (ctxt->instate == XML_PARSER_EOF)
11722 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011723 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011724 ctxt->checkIndex = 0;
11725 ctxt->instate = XML_PARSER_CONTENT;
11726#ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: entering CONTENT\n");
11729#endif
11730 }
11731 break;
11732 }
Owen Taylor3473f882001-02-23 17:55:21 +000011733 case XML_PARSER_MISC:
11734 SKIP_BLANKS;
11735 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011736 avail = ctxt->input->length -
11737 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011738 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011739 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011740 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011741 if (avail < 2)
11742 goto done;
11743 cur = ctxt->input->cur[0];
11744 next = ctxt->input->cur[1];
11745 if ((cur == '<') && (next == '?')) {
11746 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011747 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11748 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011749 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011750 }
Owen Taylor3473f882001-02-23 17:55:21 +000011751#ifdef DEBUG_PUSH
11752 xmlGenericError(xmlGenericErrorContext,
11753 "PP: Parsing PI\n");
11754#endif
11755 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011756 if (ctxt->instate == XML_PARSER_EOF)
11757 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011758 ctxt->instate = XML_PARSER_MISC;
11759 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011760 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011761 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011762 (ctxt->input->cur[2] == '-') &&
11763 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011764 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011765 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11766 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011767 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011768 }
Owen Taylor3473f882001-02-23 17:55:21 +000011769#ifdef DEBUG_PUSH
11770 xmlGenericError(xmlGenericErrorContext,
11771 "PP: Parsing Comment\n");
11772#endif
11773 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011774 if (ctxt->instate == XML_PARSER_EOF)
11775 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011776 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011777 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011778 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011779 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011780 (ctxt->input->cur[2] == 'D') &&
11781 (ctxt->input->cur[3] == 'O') &&
11782 (ctxt->input->cur[4] == 'C') &&
11783 (ctxt->input->cur[5] == 'T') &&
11784 (ctxt->input->cur[6] == 'Y') &&
11785 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011786 (ctxt->input->cur[8] == 'E')) {
11787 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011788 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11789 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011790 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011791 }
Owen Taylor3473f882001-02-23 17:55:21 +000011792#ifdef DEBUG_PUSH
11793 xmlGenericError(xmlGenericErrorContext,
11794 "PP: Parsing internal subset\n");
11795#endif
11796 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011797 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011798 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011799 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011800 if (ctxt->instate == XML_PARSER_EOF)
11801 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011802 if (RAW == '[') {
11803 ctxt->instate = XML_PARSER_DTD;
11804#ifdef DEBUG_PUSH
11805 xmlGenericError(xmlGenericErrorContext,
11806 "PP: entering DTD\n");
11807#endif
11808 } else {
11809 /*
11810 * Create and update the external subset.
11811 */
11812 ctxt->inSubset = 2;
11813 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11814 (ctxt->sax->externalSubset != NULL))
11815 ctxt->sax->externalSubset(ctxt->userData,
11816 ctxt->intSubName, ctxt->extSubSystem,
11817 ctxt->extSubURI);
11818 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011819 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011820 ctxt->instate = XML_PARSER_PROLOG;
11821#ifdef DEBUG_PUSH
11822 xmlGenericError(xmlGenericErrorContext,
11823 "PP: entering PROLOG\n");
11824#endif
11825 }
11826 } else if ((cur == '<') && (next == '!') &&
11827 (avail < 9)) {
11828 goto done;
11829 } else {
11830 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011831 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011832 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011833#ifdef DEBUG_PUSH
11834 xmlGenericError(xmlGenericErrorContext,
11835 "PP: entering START_TAG\n");
11836#endif
11837 }
11838 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011839 case XML_PARSER_PROLOG:
11840 SKIP_BLANKS;
11841 if (ctxt->input->buf == NULL)
11842 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11843 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011844 avail = xmlBufUse(ctxt->input->buf->buffer) -
11845 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011846 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011847 goto done;
11848 cur = ctxt->input->cur[0];
11849 next = ctxt->input->cur[1];
11850 if ((cur == '<') && (next == '?')) {
11851 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011852 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11853 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011854 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011855 }
Owen Taylor3473f882001-02-23 17:55:21 +000011856#ifdef DEBUG_PUSH
11857 xmlGenericError(xmlGenericErrorContext,
11858 "PP: Parsing PI\n");
11859#endif
11860 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011861 if (ctxt->instate == XML_PARSER_EOF)
11862 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011863 ctxt->instate = XML_PARSER_PROLOG;
11864 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011865 } else if ((cur == '<') && (next == '!') &&
11866 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11867 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011868 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11869 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011870 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011871 }
Owen Taylor3473f882001-02-23 17:55:21 +000011872#ifdef DEBUG_PUSH
11873 xmlGenericError(xmlGenericErrorContext,
11874 "PP: Parsing Comment\n");
11875#endif
11876 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011877 if (ctxt->instate == XML_PARSER_EOF)
11878 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011879 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011880 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011881 } else if ((cur == '<') && (next == '!') &&
11882 (avail < 4)) {
11883 goto done;
11884 } else {
11885 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011886 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011887 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011888 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011889#ifdef DEBUG_PUSH
11890 xmlGenericError(xmlGenericErrorContext,
11891 "PP: entering START_TAG\n");
11892#endif
11893 }
11894 break;
11895 case XML_PARSER_EPILOG:
11896 SKIP_BLANKS;
11897 if (ctxt->input->buf == NULL)
11898 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11899 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011900 avail = xmlBufUse(ctxt->input->buf->buffer) -
11901 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011902 if (avail < 2)
11903 goto done;
11904 cur = ctxt->input->cur[0];
11905 next = ctxt->input->cur[1];
11906 if ((cur == '<') && (next == '?')) {
11907 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011908 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11909 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011910 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011911 }
Owen Taylor3473f882001-02-23 17:55:21 +000011912#ifdef DEBUG_PUSH
11913 xmlGenericError(xmlGenericErrorContext,
11914 "PP: Parsing PI\n");
11915#endif
11916 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011917 if (ctxt->instate == XML_PARSER_EOF)
11918 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011919 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011920 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011921 } else if ((cur == '<') && (next == '!') &&
11922 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11923 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011924 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11925 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011926 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011927 }
Owen Taylor3473f882001-02-23 17:55:21 +000011928#ifdef DEBUG_PUSH
11929 xmlGenericError(xmlGenericErrorContext,
11930 "PP: Parsing Comment\n");
11931#endif
11932 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011933 if (ctxt->instate == XML_PARSER_EOF)
11934 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011935 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011936 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011937 } else if ((cur == '<') && (next == '!') &&
11938 (avail < 4)) {
11939 goto done;
11940 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011941 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011942 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011943#ifdef DEBUG_PUSH
11944 xmlGenericError(xmlGenericErrorContext,
11945 "PP: entering EOF\n");
11946#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011947 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011948 ctxt->sax->endDocument(ctxt->userData);
11949 goto done;
11950 }
11951 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011952 case XML_PARSER_DTD: {
11953 /*
11954 * Sorry but progressive parsing of the internal subset
11955 * is not expected to be supported. We first check that
11956 * the full content of the internal subset is available and
11957 * the parsing is launched only at that point.
11958 * Internal subset ends up with "']' S? '>'" in an unescaped
11959 * section and not in a ']]>' sequence which are conditional
11960 * sections (whoever argued to keep that crap in XML deserve
11961 * a place in hell !).
11962 */
11963 int base, i;
11964 xmlChar *buf;
11965 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011966 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011967
11968 base = ctxt->input->cur - ctxt->input->base;
11969 if (base < 0) return(0);
11970 if (ctxt->checkIndex > base)
11971 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011972 buf = xmlBufContent(ctxt->input->buf->buffer);
11973 use = xmlBufUse(ctxt->input->buf->buffer);
11974 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011975 if (quote != 0) {
11976 if (buf[base] == quote)
11977 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011978 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011979 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011980 if ((quote == 0) && (buf[base] == '<')) {
11981 int found = 0;
11982 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011983 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011984 (buf[base + 1] == '!') &&
11985 (buf[base + 2] == '-') &&
11986 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011987 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011988 if ((buf[base] == '-') &&
11989 (buf[base + 1] == '-') &&
11990 (buf[base + 2] == '>')) {
11991 found = 1;
11992 base += 2;
11993 break;
11994 }
11995 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011996 if (!found) {
11997#if 0
11998 fprintf(stderr, "unfinished comment\n");
11999#endif
12000 break; /* for */
12001 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012002 continue;
12003 }
12004 }
Owen Taylor3473f882001-02-23 17:55:21 +000012005 if (buf[base] == '"') {
12006 quote = '"';
12007 continue;
12008 }
12009 if (buf[base] == '\'') {
12010 quote = '\'';
12011 continue;
12012 }
12013 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012014#if 0
12015 fprintf(stderr, "%c%c%c%c: ", buf[base],
12016 buf[base + 1], buf[base + 2], buf[base + 3]);
12017#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012018 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012019 break;
12020 if (buf[base + 1] == ']') {
12021 /* conditional crap, skip both ']' ! */
12022 base++;
12023 continue;
12024 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012025 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012026 if (buf[base + i] == '>') {
12027#if 0
12028 fprintf(stderr, "found\n");
12029#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012030 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012031 }
12032 if (!IS_BLANK_CH(buf[base + i])) {
12033#if 0
12034 fprintf(stderr, "not found\n");
12035#endif
12036 goto not_end_of_int_subset;
12037 }
Owen Taylor3473f882001-02-23 17:55:21 +000012038 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012039#if 0
12040 fprintf(stderr, "end of stream\n");
12041#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012042 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012043
Owen Taylor3473f882001-02-23 17:55:21 +000012044 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012045not_end_of_int_subset:
12046 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012047 }
12048 /*
12049 * We didn't found the end of the Internal subset
12050 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012051 if (quote == 0)
12052 ctxt->checkIndex = base;
12053 else
12054 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012055#ifdef DEBUG_PUSH
12056 if (next == 0)
12057 xmlGenericError(xmlGenericErrorContext,
12058 "PP: lookup of int subset end filed\n");
12059#endif
12060 goto done;
12061
12062found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012063 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012064 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012065 if (ctxt->instate == XML_PARSER_EOF)
12066 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012067 ctxt->inSubset = 2;
12068 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12069 (ctxt->sax->externalSubset != NULL))
12070 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12071 ctxt->extSubSystem, ctxt->extSubURI);
12072 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012073 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012074 if (ctxt->instate == XML_PARSER_EOF)
12075 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012076 ctxt->instate = XML_PARSER_PROLOG;
12077 ctxt->checkIndex = 0;
12078#ifdef DEBUG_PUSH
12079 xmlGenericError(xmlGenericErrorContext,
12080 "PP: entering PROLOG\n");
12081#endif
12082 break;
12083 }
12084 case XML_PARSER_COMMENT:
12085 xmlGenericError(xmlGenericErrorContext,
12086 "PP: internal error, state == COMMENT\n");
12087 ctxt->instate = XML_PARSER_CONTENT;
12088#ifdef DEBUG_PUSH
12089 xmlGenericError(xmlGenericErrorContext,
12090 "PP: entering CONTENT\n");
12091#endif
12092 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012093 case XML_PARSER_IGNORE:
12094 xmlGenericError(xmlGenericErrorContext,
12095 "PP: internal error, state == IGNORE");
12096 ctxt->instate = XML_PARSER_DTD;
12097#ifdef DEBUG_PUSH
12098 xmlGenericError(xmlGenericErrorContext,
12099 "PP: entering DTD\n");
12100#endif
12101 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012102 case XML_PARSER_PI:
12103 xmlGenericError(xmlGenericErrorContext,
12104 "PP: internal error, state == PI\n");
12105 ctxt->instate = XML_PARSER_CONTENT;
12106#ifdef DEBUG_PUSH
12107 xmlGenericError(xmlGenericErrorContext,
12108 "PP: entering CONTENT\n");
12109#endif
12110 break;
12111 case XML_PARSER_ENTITY_DECL:
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: internal error, state == ENTITY_DECL\n");
12114 ctxt->instate = XML_PARSER_DTD;
12115#ifdef DEBUG_PUSH
12116 xmlGenericError(xmlGenericErrorContext,
12117 "PP: entering DTD\n");
12118#endif
12119 break;
12120 case XML_PARSER_ENTITY_VALUE:
12121 xmlGenericError(xmlGenericErrorContext,
12122 "PP: internal error, state == ENTITY_VALUE\n");
12123 ctxt->instate = XML_PARSER_CONTENT;
12124#ifdef DEBUG_PUSH
12125 xmlGenericError(xmlGenericErrorContext,
12126 "PP: entering DTD\n");
12127#endif
12128 break;
12129 case XML_PARSER_ATTRIBUTE_VALUE:
12130 xmlGenericError(xmlGenericErrorContext,
12131 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12132 ctxt->instate = XML_PARSER_START_TAG;
12133#ifdef DEBUG_PUSH
12134 xmlGenericError(xmlGenericErrorContext,
12135 "PP: entering START_TAG\n");
12136#endif
12137 break;
12138 case XML_PARSER_SYSTEM_LITERAL:
12139 xmlGenericError(xmlGenericErrorContext,
12140 "PP: internal error, state == SYSTEM_LITERAL\n");
12141 ctxt->instate = XML_PARSER_START_TAG;
12142#ifdef DEBUG_PUSH
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: entering START_TAG\n");
12145#endif
12146 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012147 case XML_PARSER_PUBLIC_LITERAL:
12148 xmlGenericError(xmlGenericErrorContext,
12149 "PP: internal error, state == PUBLIC_LITERAL\n");
12150 ctxt->instate = XML_PARSER_START_TAG;
12151#ifdef DEBUG_PUSH
12152 xmlGenericError(xmlGenericErrorContext,
12153 "PP: entering START_TAG\n");
12154#endif
12155 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012156 }
12157 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012158done:
Owen Taylor3473f882001-02-23 17:55:21 +000012159#ifdef DEBUG_PUSH
12160 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12161#endif
12162 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012163encoding_error:
12164 {
12165 char buffer[150];
12166
12167 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12168 ctxt->input->cur[0], ctxt->input->cur[1],
12169 ctxt->input->cur[2], ctxt->input->cur[3]);
12170 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12171 "Input is not proper UTF-8, indicate encoding !\n%s",
12172 BAD_CAST buffer, NULL);
12173 }
12174 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012175}
12176
12177/**
Daniel Veillard65686452012-07-19 18:25:01 +080012178 * xmlParseCheckTransition:
12179 * @ctxt: an XML parser context
12180 * @chunk: a char array
12181 * @size: the size in byte of the chunk
12182 *
12183 * Check depending on the current parser state if the chunk given must be
12184 * processed immediately or one need more data to advance on parsing.
12185 *
12186 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12187 */
12188static int
12189xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12190 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12191 return(-1);
12192 if (ctxt->instate == XML_PARSER_START_TAG) {
12193 if (memchr(chunk, '>', size) != NULL)
12194 return(1);
12195 return(0);
12196 }
12197 if (ctxt->progressive == XML_PARSER_COMMENT) {
12198 if (memchr(chunk, '>', size) != NULL)
12199 return(1);
12200 return(0);
12201 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012202 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12203 if (memchr(chunk, '>', size) != NULL)
12204 return(1);
12205 return(0);
12206 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012207 if (ctxt->progressive == XML_PARSER_PI) {
12208 if (memchr(chunk, '>', size) != NULL)
12209 return(1);
12210 return(0);
12211 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012212 if (ctxt->instate == XML_PARSER_END_TAG) {
12213 if (memchr(chunk, '>', size) != NULL)
12214 return(1);
12215 return(0);
12216 }
12217 if ((ctxt->progressive == XML_PARSER_DTD) ||
12218 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012219 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012220 return(1);
12221 return(0);
12222 }
Daniel Veillard65686452012-07-19 18:25:01 +080012223 return(1);
12224}
12225
12226/**
Owen Taylor3473f882001-02-23 17:55:21 +000012227 * xmlParseChunk:
12228 * @ctxt: an XML parser context
12229 * @chunk: an char array
12230 * @size: the size in byte of the chunk
12231 * @terminate: last chunk indicator
12232 *
12233 * Parse a Chunk of memory
12234 *
12235 * Returns zero if no error, the xmlParserErrors otherwise.
12236 */
12237int
12238xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12239 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012240 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012241 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012242 size_t old_avail = 0;
12243 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012244
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012245 if (ctxt == NULL)
12246 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012247 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012248 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012249 if (ctxt->instate == XML_PARSER_EOF)
12250 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012251 if (ctxt->instate == XML_PARSER_START)
12252 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012253 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12254 (chunk[size - 1] == '\r')) {
12255 end_in_lf = 1;
12256 size--;
12257 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012258
12259xmldecl_done:
12260
Owen Taylor3473f882001-02-23 17:55:21 +000012261 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12262 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012263 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12264 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012265 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012266
Daniel Veillard65686452012-07-19 18:25:01 +080012267 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012268 /*
12269 * Specific handling if we autodetected an encoding, we should not
12270 * push more than the first line ... which depend on the encoding
12271 * And only push the rest once the final encoding was detected
12272 */
12273 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12274 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012275 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012276
12277 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278 BAD_CAST "UTF-16")) ||
12279 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280 BAD_CAST "UTF16")))
12281 len = 90;
12282 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12283 BAD_CAST "UCS-4")) ||
12284 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12285 BAD_CAST "UCS4")))
12286 len = 180;
12287
12288 if (ctxt->input->buf->rawconsumed < len)
12289 len -= ctxt->input->buf->rawconsumed;
12290
Raul Hudeaba9716a2010-03-15 10:13:29 +010012291 /*
12292 * Change size for reading the initial declaration only
12293 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12294 * will blindly copy extra bytes from memory.
12295 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012296 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012297 remain = size - len;
12298 size = len;
12299 } else {
12300 remain = 0;
12301 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012302 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012303 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012304 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
William M. Bracka3215c72004-07-31 16:24:01 +000012305 if (res < 0) {
12306 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012307 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012308 return (XML_PARSER_EOF);
12309 }
Owen Taylor3473f882001-02-23 17:55:21 +000012310#ifdef DEBUG_PUSH
12311 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12312#endif
12313
Owen Taylor3473f882001-02-23 17:55:21 +000012314 } else if (ctxt->instate != XML_PARSER_EOF) {
12315 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12316 xmlParserInputBufferPtr in = ctxt->input->buf;
12317 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12318 (in->raw != NULL)) {
12319 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012320 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12321 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012322
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012323 nbchars = xmlCharEncInput(in, terminate);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012324 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012325 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012326 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012327 xmlGenericError(xmlGenericErrorContext,
12328 "xmlParseChunk: encoder error\n");
Nick Wellnhoferab362ab2018-01-22 15:40:05 +010012329 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012330 return(XML_ERR_INVALID_ENCODING);
12331 }
12332 }
12333 }
12334 }
Daniel Veillard65686452012-07-19 18:25:01 +080012335 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012336 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012337 } else {
12338 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12339 avail = xmlBufUse(ctxt->input->buf->buffer);
12340 /*
12341 * Depending on the current state it may not be such
12342 * a good idea to try parsing if there is nothing in the chunk
12343 * which would be worth doing a parser state transition and we
12344 * need to wait for more data
12345 */
12346 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12347 (old_avail == 0) || (avail == 0) ||
12348 (xmlParseCheckTransition(ctxt,
12349 (const char *)&ctxt->input->base[old_avail],
12350 avail - old_avail)))
12351 xmlParseTryOrFinish(ctxt, terminate);
12352 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012353 if (ctxt->instate == XML_PARSER_EOF)
12354 return(ctxt->errNo);
12355
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012356 if ((ctxt->input != NULL) &&
12357 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12358 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12359 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12360 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012361 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012362 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012363 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12364 return(ctxt->errNo);
12365
12366 if (remain != 0) {
12367 chunk += size;
12368 size = remain;
12369 remain = 0;
12370 goto xmldecl_done;
12371 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012372 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12373 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012374 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12375 ctxt->input);
12376 size_t current = ctxt->input->cur - ctxt->input->base;
12377
Daniel Veillarda617e242006-01-09 14:38:44 +000012378 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012379
12380 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12381 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012382 }
Owen Taylor3473f882001-02-23 17:55:21 +000012383 if (terminate) {
12384 /*
12385 * Check for termination
12386 */
Daniel Veillard65686452012-07-19 18:25:01 +080012387 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012388
12389 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012390 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012391 cur_avail = ctxt->input->length -
12392 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012393 else
Daniel Veillard65686452012-07-19 18:25:01 +080012394 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12395 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012396 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012397
Owen Taylor3473f882001-02-23 17:55:21 +000012398 if ((ctxt->instate != XML_PARSER_EOF) &&
12399 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012400 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012401 }
Daniel Veillard65686452012-07-19 18:25:01 +080012402 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012403 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012404 }
Owen Taylor3473f882001-02-23 17:55:21 +000012405 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012406 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012407 ctxt->sax->endDocument(ctxt->userData);
12408 }
12409 ctxt->instate = XML_PARSER_EOF;
12410 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012411 if (ctxt->wellFormed == 0)
12412 return((xmlParserErrors) ctxt->errNo);
12413 else
12414 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012415}
12416
12417/************************************************************************
12418 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012419 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012420 * *
12421 ************************************************************************/
12422
12423/**
Owen Taylor3473f882001-02-23 17:55:21 +000012424 * xmlCreatePushParserCtxt:
12425 * @sax: a SAX handler
12426 * @user_data: The user data returned on SAX callbacks
12427 * @chunk: a pointer to an array of chars
12428 * @size: number of chars in the array
12429 * @filename: an optional file name or URI
12430 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012431 * Create a parser context for using the XML parser in push mode.
12432 * If @buffer and @size are non-NULL, the data is used to detect
12433 * the encoding. The remaining characters will be parsed so they
12434 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012435 * To allow content encoding detection, @size should be >= 4
12436 * The value of @filename is used for fetching external entities
12437 * and error/warning reports.
12438 *
12439 * Returns the new parser context or NULL
12440 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012441
Owen Taylor3473f882001-02-23 17:55:21 +000012442xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012443xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012444 const char *chunk, int size, const char *filename) {
12445 xmlParserCtxtPtr ctxt;
12446 xmlParserInputPtr inputStream;
12447 xmlParserInputBufferPtr buf;
12448 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12449
12450 /*
12451 * plug some encoding conversion routines
12452 */
12453 if ((chunk != NULL) && (size >= 4))
12454 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12455
12456 buf = xmlAllocParserInputBuffer(enc);
12457 if (buf == NULL) return(NULL);
12458
12459 ctxt = xmlNewParserCtxt();
12460 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012461 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012462 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012463 return(NULL);
12464 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012465 ctxt->dictNames = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012466 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012467#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012468 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012469#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012470 xmlFree(ctxt->sax);
12471 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12472 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012473 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012474 xmlFreeParserInputBuffer(buf);
12475 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012476 return(NULL);
12477 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012478 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12479 if (sax->initialized == XML_SAX2_MAGIC)
12480 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12481 else
12482 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012483 if (user_data != NULL)
12484 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012485 }
Owen Taylor3473f882001-02-23 17:55:21 +000012486 if (filename == NULL) {
12487 ctxt->directory = NULL;
12488 } else {
12489 ctxt->directory = xmlParserGetDirectory(filename);
12490 }
12491
12492 inputStream = xmlNewInputStream(ctxt);
12493 if (inputStream == NULL) {
12494 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012495 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012496 return(NULL);
12497 }
12498
12499 if (filename == NULL)
12500 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012501 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012502 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012503 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012504 if (inputStream->filename == NULL) {
12505 xmlFreeParserCtxt(ctxt);
12506 xmlFreeParserInputBuffer(buf);
12507 return(NULL);
12508 }
12509 }
Owen Taylor3473f882001-02-23 17:55:21 +000012510 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012511 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012512 inputPush(ctxt, inputStream);
12513
William M. Brack3a1cd212005-02-11 14:35:54 +000012514 /*
12515 * If the caller didn't provide an initial 'chunk' for determining
12516 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12517 * that it can be automatically determined later
12518 */
12519 if ((size == 0) || (chunk == NULL)) {
12520 ctxt->charset = XML_CHAR_ENCODING_NONE;
12521 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012522 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12523 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012524
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012525 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012526
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012527 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012528#ifdef DEBUG_PUSH
12529 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12530#endif
12531 }
12532
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012533 if (enc != XML_CHAR_ENCODING_NONE) {
12534 xmlSwitchEncoding(ctxt, enc);
12535 }
12536
Owen Taylor3473f882001-02-23 17:55:21 +000012537 return(ctxt);
12538}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012539#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012540
12541/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012542 * xmlHaltParser:
12543 * @ctxt: an XML parser context
12544 *
12545 * Blocks further parser processing don't override error
12546 * for internal use
12547 */
12548static void
12549xmlHaltParser(xmlParserCtxtPtr ctxt) {
12550 if (ctxt == NULL)
12551 return;
12552 ctxt->instate = XML_PARSER_EOF;
12553 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012554 while (ctxt->inputNr > 1)
12555 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012556 if (ctxt->input != NULL) {
12557 /*
12558 * in case there was a specific allocation deallocate before
12559 * overriding base
12560 */
12561 if (ctxt->input->free != NULL) {
12562 ctxt->input->free((xmlChar *) ctxt->input->base);
12563 ctxt->input->free = NULL;
12564 }
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012565 if (ctxt->input->buf != NULL) {
12566 xmlFreeParserInputBuffer(ctxt->input->buf);
12567 ctxt->input->buf = NULL;
12568 }
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012569 ctxt->input->cur = BAD_CAST"";
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012570 ctxt->input->length = 0;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012571 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012572 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012573 }
12574}
12575
12576/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012577 * xmlStopParser:
12578 * @ctxt: an XML parser context
12579 *
12580 * Blocks further parser processing
12581 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012582void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012583xmlStopParser(xmlParserCtxtPtr ctxt) {
12584 if (ctxt == NULL)
12585 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012586 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012587 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012588}
12589
12590/**
Owen Taylor3473f882001-02-23 17:55:21 +000012591 * xmlCreateIOParserCtxt:
12592 * @sax: a SAX handler
12593 * @user_data: The user data returned on SAX callbacks
12594 * @ioread: an I/O read function
12595 * @ioclose: an I/O close function
12596 * @ioctx: an I/O handler
12597 * @enc: the charset encoding if known
12598 *
12599 * Create a parser context for using the XML parser with an existing
12600 * I/O stream
12601 *
12602 * Returns the new parser context or NULL
12603 */
12604xmlParserCtxtPtr
12605xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12606 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12607 void *ioctx, xmlCharEncoding enc) {
12608 xmlParserCtxtPtr ctxt;
12609 xmlParserInputPtr inputStream;
12610 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012611
Daniel Veillard42595322004-11-08 10:52:06 +000012612 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012613
12614 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012615 if (buf == NULL) {
12616 if (ioclose != NULL)
12617 ioclose(ioctx);
12618 return (NULL);
12619 }
Owen Taylor3473f882001-02-23 17:55:21 +000012620
12621 ctxt = xmlNewParserCtxt();
12622 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012623 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012624 return(NULL);
12625 }
12626 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012627#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012628 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012629#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012630 xmlFree(ctxt->sax);
12631 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12632 if (ctxt->sax == NULL) {
Elliott Hughesecdab2a2022-02-23 14:33:50 -080012633 xmlFreeParserInputBuffer(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012634 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012635 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012636 return(NULL);
12637 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012638 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12639 if (sax->initialized == XML_SAX2_MAGIC)
12640 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12641 else
12642 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012643 if (user_data != NULL)
12644 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012645 }
Owen Taylor3473f882001-02-23 17:55:21 +000012646
12647 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12648 if (inputStream == NULL) {
12649 xmlFreeParserCtxt(ctxt);
12650 return(NULL);
12651 }
12652 inputPush(ctxt, inputStream);
12653
12654 return(ctxt);
12655}
12656
Daniel Veillard4432df22003-09-28 18:58:27 +000012657#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012658/************************************************************************
12659 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012660 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012661 * *
12662 ************************************************************************/
12663
12664/**
12665 * xmlIOParseDTD:
12666 * @sax: the SAX handler block or NULL
12667 * @input: an Input Buffer
12668 * @enc: the charset encoding if known
12669 *
12670 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012671 *
Owen Taylor3473f882001-02-23 17:55:21 +000012672 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012673 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012674 */
12675
12676xmlDtdPtr
12677xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12678 xmlCharEncoding enc) {
12679 xmlDtdPtr ret = NULL;
12680 xmlParserCtxtPtr ctxt;
12681 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012682 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012683
12684 if (input == NULL)
12685 return(NULL);
12686
12687 ctxt = xmlNewParserCtxt();
12688 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012689 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012690 return(NULL);
12691 }
12692
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012693 /* We are loading a DTD */
12694 ctxt->options |= XML_PARSE_DTDLOAD;
12695
Owen Taylor3473f882001-02-23 17:55:21 +000012696 /*
12697 * Set-up the SAX context
12698 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012699 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012700 if (ctxt->sax != NULL)
12701 xmlFree(ctxt->sax);
12702 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012703 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012704 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012705 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012706
12707 /*
12708 * generate a parser input from the I/O handler
12709 */
12710
Daniel Veillard43caefb2003-12-07 19:32:22 +000012711 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012712 if (pinput == NULL) {
12713 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012714 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012715 xmlFreeParserCtxt(ctxt);
12716 return(NULL);
12717 }
12718
12719 /*
12720 * plug some encoding conversion routines here.
12721 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012722 if (xmlPushInput(ctxt, pinput) < 0) {
12723 if (sax != NULL) ctxt->sax = NULL;
12724 xmlFreeParserCtxt(ctxt);
12725 return(NULL);
12726 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012727 if (enc != XML_CHAR_ENCODING_NONE) {
12728 xmlSwitchEncoding(ctxt, enc);
12729 }
Owen Taylor3473f882001-02-23 17:55:21 +000012730
12731 pinput->filename = NULL;
12732 pinput->line = 1;
12733 pinput->col = 1;
12734 pinput->base = ctxt->input->cur;
12735 pinput->cur = ctxt->input->cur;
12736 pinput->free = NULL;
12737
12738 /*
12739 * let's parse that entity knowing it's an external subset.
12740 */
12741 ctxt->inSubset = 2;
12742 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012743 if (ctxt->myDoc == NULL) {
12744 xmlErrMemory(ctxt, "New Doc failed");
12745 return(NULL);
12746 }
12747 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012748 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12749 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012750
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012751 if ((enc == XML_CHAR_ENCODING_NONE) &&
12752 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012753 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012754 * Get the 4 first bytes and decode the charset
12755 * if enc != XML_CHAR_ENCODING_NONE
12756 * plug some encoding conversion routines.
12757 */
12758 start[0] = RAW;
12759 start[1] = NXT(1);
12760 start[2] = NXT(2);
12761 start[3] = NXT(3);
12762 enc = xmlDetectCharEncoding(start, 4);
12763 if (enc != XML_CHAR_ENCODING_NONE) {
12764 xmlSwitchEncoding(ctxt, enc);
12765 }
12766 }
12767
Owen Taylor3473f882001-02-23 17:55:21 +000012768 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12769
12770 if (ctxt->myDoc != NULL) {
12771 if (ctxt->wellFormed) {
12772 ret = ctxt->myDoc->extSubset;
12773 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012774 if (ret != NULL) {
12775 xmlNodePtr tmp;
12776
12777 ret->doc = NULL;
12778 tmp = ret->children;
12779 while (tmp != NULL) {
12780 tmp->doc = NULL;
12781 tmp = tmp->next;
12782 }
12783 }
Owen Taylor3473f882001-02-23 17:55:21 +000012784 } else {
12785 ret = NULL;
12786 }
12787 xmlFreeDoc(ctxt->myDoc);
12788 ctxt->myDoc = NULL;
12789 }
12790 if (sax != NULL) ctxt->sax = NULL;
12791 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012792
Owen Taylor3473f882001-02-23 17:55:21 +000012793 return(ret);
12794}
12795
12796/**
12797 * xmlSAXParseDTD:
12798 * @sax: the SAX handler block
12799 * @ExternalID: a NAME* containing the External ID of the DTD
12800 * @SystemID: a NAME* containing the URL to the DTD
12801 *
12802 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012803 *
Owen Taylor3473f882001-02-23 17:55:21 +000012804 * Returns the resulting xmlDtdPtr or NULL in case of error.
12805 */
12806
12807xmlDtdPtr
12808xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12809 const xmlChar *SystemID) {
12810 xmlDtdPtr ret = NULL;
12811 xmlParserCtxtPtr ctxt;
12812 xmlParserInputPtr input = NULL;
12813 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012814 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012815
12816 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12817
12818 ctxt = xmlNewParserCtxt();
12819 if (ctxt == NULL) {
12820 return(NULL);
12821 }
12822
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012823 /* We are loading a DTD */
12824 ctxt->options |= XML_PARSE_DTDLOAD;
12825
Owen Taylor3473f882001-02-23 17:55:21 +000012826 /*
12827 * Set-up the SAX context
12828 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012829 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012830 if (ctxt->sax != NULL)
12831 xmlFree(ctxt->sax);
12832 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012833 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012834 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012835
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012836 /*
12837 * Canonicalise the system ID
12838 */
12839 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012840 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012841 xmlFreeParserCtxt(ctxt);
12842 return(NULL);
12843 }
Owen Taylor3473f882001-02-23 17:55:21 +000012844
12845 /*
12846 * Ask the Entity resolver to load the damn thing
12847 */
12848
12849 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012850 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12851 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012852 if (input == NULL) {
12853 if (sax != NULL) ctxt->sax = NULL;
12854 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012855 if (systemIdCanonic != NULL)
12856 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012857 return(NULL);
12858 }
12859
12860 /*
12861 * plug some encoding conversion routines here.
12862 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012863 if (xmlPushInput(ctxt, input) < 0) {
12864 if (sax != NULL) ctxt->sax = NULL;
12865 xmlFreeParserCtxt(ctxt);
12866 if (systemIdCanonic != NULL)
12867 xmlFree(systemIdCanonic);
12868 return(NULL);
12869 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012870 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12871 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12872 xmlSwitchEncoding(ctxt, enc);
12873 }
Owen Taylor3473f882001-02-23 17:55:21 +000012874
12875 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012876 input->filename = (char *) systemIdCanonic;
12877 else
12878 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012879 input->line = 1;
12880 input->col = 1;
12881 input->base = ctxt->input->cur;
12882 input->cur = ctxt->input->cur;
12883 input->free = NULL;
12884
12885 /*
12886 * let's parse that entity knowing it's an external subset.
12887 */
12888 ctxt->inSubset = 2;
12889 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012890 if (ctxt->myDoc == NULL) {
12891 xmlErrMemory(ctxt, "New Doc failed");
12892 if (sax != NULL) ctxt->sax = NULL;
12893 xmlFreeParserCtxt(ctxt);
12894 return(NULL);
12895 }
12896 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012897 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12898 ExternalID, SystemID);
12899 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12900
12901 if (ctxt->myDoc != NULL) {
12902 if (ctxt->wellFormed) {
12903 ret = ctxt->myDoc->extSubset;
12904 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012905 if (ret != NULL) {
12906 xmlNodePtr tmp;
12907
12908 ret->doc = NULL;
12909 tmp = ret->children;
12910 while (tmp != NULL) {
12911 tmp->doc = NULL;
12912 tmp = tmp->next;
12913 }
12914 }
Owen Taylor3473f882001-02-23 17:55:21 +000012915 } else {
12916 ret = NULL;
12917 }
12918 xmlFreeDoc(ctxt->myDoc);
12919 ctxt->myDoc = NULL;
12920 }
12921 if (sax != NULL) ctxt->sax = NULL;
12922 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012923
Owen Taylor3473f882001-02-23 17:55:21 +000012924 return(ret);
12925}
12926
Daniel Veillard4432df22003-09-28 18:58:27 +000012927
Owen Taylor3473f882001-02-23 17:55:21 +000012928/**
12929 * xmlParseDTD:
12930 * @ExternalID: a NAME* containing the External ID of the DTD
12931 * @SystemID: a NAME* containing the URL to the DTD
12932 *
12933 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012934 *
Owen Taylor3473f882001-02-23 17:55:21 +000012935 * Returns the resulting xmlDtdPtr or NULL in case of error.
12936 */
12937
12938xmlDtdPtr
12939xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12940 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12941}
Daniel Veillard4432df22003-09-28 18:58:27 +000012942#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012943
12944/************************************************************************
12945 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012946 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012947 * *
12948 ************************************************************************/
12949
12950/**
Owen Taylor3473f882001-02-23 17:55:21 +000012951 * xmlParseCtxtExternalEntity:
12952 * @ctx: the existing parsing context
12953 * @URL: the URL for the entity to load
12954 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012955 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012956 *
12957 * Parse an external general entity within an existing parsing context
12958 * An external general parsed entity is well-formed if it matches the
12959 * production labeled extParsedEnt.
12960 *
12961 * [78] extParsedEnt ::= TextDecl? content
12962 *
12963 * Returns 0 if the entity is well formed, -1 in case of args problem and
12964 * the parser error code otherwise
12965 */
12966
12967int
12968xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012969 const xmlChar *ID, xmlNodePtr *lst) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012970 void *userData;
Owen Taylor3473f882001-02-23 17:55:21 +000012971
Daniel Veillardce682bc2004-11-05 17:22:25 +000012972 if (ctx == NULL) return(-1);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012973 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012974 * If the user provided their own SAX callbacks, then reuse the
12975 * userData callback field, otherwise the expected setup in a
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012976 * DOM builder is to have userData == ctxt
12977 */
12978 if (ctx->userData == ctx)
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012979 userData = NULL;
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012980 else
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012981 userData = ctx->userData;
12982 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12983 userData, ctx->depth + 1,
12984 URL, ID, lst);
Owen Taylor3473f882001-02-23 17:55:21 +000012985}
12986
12987/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012988 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012989 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012990 * @oldctxt: the previous parser context if available
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012991 * @sax: the SAX handler block (possibly NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000012992 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12993 * @depth: Used for loop detection, use 0
12994 * @URL: the URL for the entity to load
12995 * @ID: the System ID for the entity to load
12996 * @list: the return value for the set of parsed nodes
12997 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012998 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012999 *
13000 * Returns 0 if the entity is well formed, -1 in case of args problem and
13001 * the parser error code otherwise
13002 */
13003
Daniel Veillard7d515752003-09-26 19:12:37 +000013004static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013005xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13006 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013007 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013008 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013009 xmlParserCtxtPtr ctxt;
13010 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013011 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013012 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013013 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013014 xmlChar start[4];
13015 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013016
Daniel Veillard0161e632008-08-28 15:36:32 +000013017 if (((depth > 40) &&
13018 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13019 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013020 return(XML_ERR_ENTITY_LOOP);
13021 }
13022
Owen Taylor3473f882001-02-23 17:55:21 +000013023 if (list != NULL)
13024 *list = NULL;
13025 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013026 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013027 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013028 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013029
13030
Rob Richards9c0aa472009-03-26 18:10:19 +000013031 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013032 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013033 ctxt->userData = ctxt;
13034 if (sax != NULL) {
13035 oldsax = ctxt->sax;
13036 ctxt->sax = sax;
13037 if (user_data != NULL)
13038 ctxt->userData = user_data;
13039 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013040 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013041 newDoc = xmlNewDoc(BAD_CAST "1.0");
13042 if (newDoc == NULL) {
13043 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013044 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013045 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013046 newDoc->properties = XML_DOC_INTERNAL;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013047 if (doc) {
13048 newDoc->intSubset = doc->intSubset;
13049 newDoc->extSubset = doc->extSubset;
13050 if (doc->dict) {
13051 newDoc->dict = doc->dict;
13052 xmlDictReference(newDoc->dict);
13053 }
13054 if (doc->URL != NULL) {
13055 newDoc->URL = xmlStrdup(doc->URL);
13056 }
Owen Taylor3473f882001-02-23 17:55:21 +000013057 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013058 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13059 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013060 if (sax != NULL)
13061 ctxt->sax = oldsax;
13062 xmlFreeParserCtxt(ctxt);
13063 newDoc->intSubset = NULL;
13064 newDoc->extSubset = NULL;
13065 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013066 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013067 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013068 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013069 nodePush(ctxt, newDoc->children);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013070 if (doc == NULL) {
13071 ctxt->myDoc = newDoc;
13072 } else {
13073 ctxt->myDoc = doc;
13074 newRoot->doc = doc;
13075 }
Owen Taylor3473f882001-02-23 17:55:21 +000013076
Daniel Veillard0161e632008-08-28 15:36:32 +000013077 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013078 * Get the 4 first bytes and decode the charset
13079 * if enc != XML_CHAR_ENCODING_NONE
13080 * plug some encoding conversion routines.
13081 */
13082 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013083 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13084 start[0] = RAW;
13085 start[1] = NXT(1);
13086 start[2] = NXT(2);
13087 start[3] = NXT(3);
13088 enc = xmlDetectCharEncoding(start, 4);
13089 if (enc != XML_CHAR_ENCODING_NONE) {
13090 xmlSwitchEncoding(ctxt, enc);
13091 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013092 }
13093
Owen Taylor3473f882001-02-23 17:55:21 +000013094 /*
13095 * Parse a possible text declaration first
13096 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013097 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013098 xmlParseTextDecl(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013099 /*
13100 * An XML-1.0 document can't reference an entity not XML-1.0
13101 */
13102 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13103 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13104 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13105 "Version mismatch between document and entity\n");
13106 }
Owen Taylor3473f882001-02-23 17:55:21 +000013107 }
13108
Owen Taylor3473f882001-02-23 17:55:21 +000013109 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013110 ctxt->depth = depth;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013111 if (oldctxt != NULL) {
13112 ctxt->_private = oldctxt->_private;
13113 ctxt->loadsubset = oldctxt->loadsubset;
13114 ctxt->validate = oldctxt->validate;
13115 ctxt->valid = oldctxt->valid;
13116 ctxt->replaceEntities = oldctxt->replaceEntities;
13117 if (oldctxt->validate) {
13118 ctxt->vctxt.error = oldctxt->vctxt.error;
13119 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13120 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13121 }
13122 ctxt->external = oldctxt->external;
13123 if (ctxt->dict) xmlDictFree(ctxt->dict);
13124 ctxt->dict = oldctxt->dict;
13125 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13126 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13127 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13128 ctxt->dictNames = oldctxt->dictNames;
13129 ctxt->attsDefault = oldctxt->attsDefault;
13130 ctxt->attsSpecial = oldctxt->attsSpecial;
13131 ctxt->linenumbers = oldctxt->linenumbers;
13132 ctxt->record_info = oldctxt->record_info;
13133 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13134 ctxt->node_seq.length = oldctxt->node_seq.length;
13135 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13136 } else {
13137 /*
13138 * Doing validity checking on chunk without context
13139 * doesn't make sense
13140 */
13141 ctxt->_private = NULL;
13142 ctxt->validate = 0;
13143 ctxt->external = 2;
13144 ctxt->loadsubset = 0;
13145 }
Owen Taylor3473f882001-02-23 17:55:21 +000013146
13147 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013148
Daniel Veillard561b7f82002-03-20 21:55:57 +000013149 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013150 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013151 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013152 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013153 }
13154 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013155 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013156 }
13157
13158 if (!ctxt->wellFormed) {
13159 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013160 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013161 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013162 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013163 } else {
13164 if (list != NULL) {
13165 xmlNodePtr cur;
13166
13167 /*
13168 * Return the newly created nodeset after unlinking it from
13169 * they pseudo parent.
13170 */
13171 cur = newDoc->children->children;
13172 *list = cur;
13173 while (cur != NULL) {
13174 cur->parent = NULL;
13175 cur = cur->next;
13176 }
13177 newDoc->children->children = NULL;
13178 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013179 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013180 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013181
13182 /*
13183 * Record in the parent context the number of entities replacement
13184 * done when parsing that reference.
13185 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013186 if (oldctxt != NULL)
13187 oldctxt->nbentities += ctxt->nbentities;
13188
Daniel Veillard0161e632008-08-28 15:36:32 +000013189 /*
13190 * Also record the size of the entity parsed
13191 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013192 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013193 oldctxt->sizeentities += ctxt->input->consumed;
13194 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13195 }
13196 /*
13197 * And record the last error if any
13198 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013199 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013200 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13201
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013202 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013203 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013204 if (oldctxt != NULL) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013205 ctxt->dict = NULL;
13206 ctxt->attsDefault = NULL;
13207 ctxt->attsSpecial = NULL;
13208 oldctxt->validate = ctxt->validate;
13209 oldctxt->valid = ctxt->valid;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013210 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13211 oldctxt->node_seq.length = ctxt->node_seq.length;
13212 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13213 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013214 ctxt->node_seq.maximum = 0;
13215 ctxt->node_seq.length = 0;
13216 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013217 xmlFreeParserCtxt(ctxt);
13218 newDoc->intSubset = NULL;
13219 newDoc->extSubset = NULL;
13220 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013221
Owen Taylor3473f882001-02-23 17:55:21 +000013222 return(ret);
13223}
13224
Daniel Veillard81273902003-09-30 00:43:48 +000013225#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013226/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013227 * xmlParseExternalEntity:
13228 * @doc: the document the chunk pertains to
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013229 * @sax: the SAX handler block (possibly NULL)
Daniel Veillard257d9102001-05-08 10:41:44 +000013230 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13231 * @depth: Used for loop detection, use 0
13232 * @URL: the URL for the entity to load
13233 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013234 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013235 *
13236 * Parse an external general entity
13237 * An external general parsed entity is well-formed if it matches the
13238 * production labeled extParsedEnt.
13239 *
13240 * [78] extParsedEnt ::= TextDecl? content
13241 *
13242 * Returns 0 if the entity is well formed, -1 in case of args problem and
13243 * the parser error code otherwise
13244 */
13245
13246int
13247xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013248 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013249 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013250 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013251}
13252
13253/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013254 * xmlParseBalancedChunkMemory:
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013255 * @doc: the document the chunk pertains to (must not be NULL)
13256 * @sax: the SAX handler block (possibly NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013257 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13258 * @depth: Used for loop detection, use 0
13259 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013260 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013261 *
13262 * Parse a well-balanced chunk of an XML document
13263 * called by the parser
13264 * The allowed sequence for the Well Balanced Chunk is the one defined by
13265 * the content production in the XML grammar:
13266 *
13267 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13268 *
13269 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13270 * the parser error code otherwise
13271 */
13272
13273int
13274xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013275 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013276 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13277 depth, string, lst, 0 );
13278}
Daniel Veillard81273902003-09-30 00:43:48 +000013279#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013280
13281/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013282 * xmlParseBalancedChunkMemoryInternal:
13283 * @oldctxt: the existing parsing context
13284 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13285 * @user_data: the user data field for the parser context
13286 * @lst: the return value for the set of parsed nodes
13287 *
13288 *
13289 * Parse a well-balanced chunk of an XML document
13290 * called by the parser
13291 * The allowed sequence for the Well Balanced Chunk is the one defined by
13292 * the content production in the XML grammar:
13293 *
13294 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13295 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013296 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13297 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013298 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013299 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013300 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013301 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013302static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013303xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13304 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13305 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013306 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013307 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013308 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013309 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013310 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013311 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013312 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013313#ifdef SAX2
13314 int i;
13315#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013316
Daniel Veillard0161e632008-08-28 15:36:32 +000013317 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13318 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013319 return(XML_ERR_ENTITY_LOOP);
13320 }
13321
13322
13323 if (lst != NULL)
13324 *lst = NULL;
13325 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013326 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013327
13328 size = xmlStrlen(string);
13329
13330 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013331 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013332 if (user_data != NULL)
13333 ctxt->userData = user_data;
13334 else
13335 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013336 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13337 ctxt->dict = oldctxt->dict;
Daniel Veillardad88b542017-12-08 09:42:31 +010013338 ctxt->input_id = oldctxt->input_id + 1;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013339 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13340 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13341 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013342
Daniel Veillard74eaec12009-08-26 15:57:20 +020013343#ifdef SAX2
13344 /* propagate namespaces down the entity */
13345 for (i = 0;i < oldctxt->nsNr;i += 2) {
13346 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13347 }
13348#endif
13349
Daniel Veillard328f48c2002-11-15 15:24:34 +000013350 oldsax = ctxt->sax;
13351 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013352 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013353 ctxt->replaceEntities = oldctxt->replaceEntities;
13354 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013355
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013356 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013357 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013358 newDoc = xmlNewDoc(BAD_CAST "1.0");
13359 if (newDoc == NULL) {
13360 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013361 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013362 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013363 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013364 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013365 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013366 newDoc->dict = ctxt->dict;
13367 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013368 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013369 } else {
13370 ctxt->myDoc = oldctxt->myDoc;
13371 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013372 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013373 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013374 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13375 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013376 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013377 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013378 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013379 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013380 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013381 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013382 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013383 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013384 ctxt->myDoc->children = NULL;
13385 ctxt->myDoc->last = NULL;
13386 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013387 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013388 ctxt->instate = XML_PARSER_CONTENT;
13389 ctxt->depth = oldctxt->depth + 1;
13390
Daniel Veillard328f48c2002-11-15 15:24:34 +000013391 ctxt->validate = 0;
13392 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013393 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13394 /*
13395 * ID/IDREF registration will be done in xmlValidateElement below
13396 */
13397 ctxt->loadsubset |= XML_SKIP_IDS;
13398 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013399 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013400 ctxt->attsDefault = oldctxt->attsDefault;
13401 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013402
Daniel Veillard68e9e742002-11-16 15:35:11 +000013403 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013404 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013405 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013406 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013407 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013408 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013409 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013410 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013411 }
13412
13413 if (!ctxt->wellFormed) {
13414 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013415 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013416 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013417 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013418 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013419 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013420 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013421
William M. Brack7b9154b2003-09-27 19:23:50 +000013422 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013423 xmlNodePtr cur;
13424
13425 /*
13426 * Return the newly created nodeset after unlinking it from
13427 * they pseudo parent.
13428 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013429 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013430 *lst = cur;
13431 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013432#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013433 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13434 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13435 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013436 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13437 oldctxt->myDoc, cur);
13438 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013439#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013440 cur->parent = NULL;
13441 cur = cur->next;
13442 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013443 ctxt->myDoc->children->children = NULL;
13444 }
13445 if (ctxt->myDoc != NULL) {
13446 xmlFreeNode(ctxt->myDoc->children);
13447 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013448 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013449 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013450
13451 /*
13452 * Record in the parent context the number of entities replacement
13453 * done when parsing that reference.
13454 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013455 if (oldctxt != NULL)
13456 oldctxt->nbentities += ctxt->nbentities;
13457
Daniel Veillard0161e632008-08-28 15:36:32 +000013458 /*
13459 * Also record the last error if any
13460 */
13461 if (ctxt->lastError.code != XML_ERR_OK)
13462 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13463
Daniel Veillard328f48c2002-11-15 15:24:34 +000013464 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013465 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013466 ctxt->attsDefault = NULL;
13467 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013468 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013469 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013470 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013471 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013472
Daniel Veillard328f48c2002-11-15 15:24:34 +000013473 return(ret);
13474}
13475
Daniel Veillard29b17482004-08-16 00:39:03 +000013476/**
13477 * xmlParseInNodeContext:
13478 * @node: the context node
13479 * @data: the input string
13480 * @datalen: the input string length in bytes
13481 * @options: a combination of xmlParserOption
13482 * @lst: the return value for the set of parsed nodes
13483 *
13484 * Parse a well-balanced chunk of an XML document
13485 * within the context (DTD, namespaces, etc ...) of the given node.
13486 *
13487 * The allowed sequence for the data is a Well Balanced Chunk defined by
13488 * the content production in the XML grammar:
13489 *
13490 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13491 *
13492 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13493 * error code otherwise
13494 */
13495xmlParserErrors
13496xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13497 int options, xmlNodePtr *lst) {
13498#ifdef SAX2
13499 xmlParserCtxtPtr ctxt;
13500 xmlDocPtr doc = NULL;
13501 xmlNodePtr fake, cur;
13502 int nsnr = 0;
13503
13504 xmlParserErrors ret = XML_ERR_OK;
13505
13506 /*
13507 * check all input parameters, grab the document
13508 */
13509 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13510 return(XML_ERR_INTERNAL_ERROR);
13511 switch (node->type) {
13512 case XML_ELEMENT_NODE:
13513 case XML_ATTRIBUTE_NODE:
13514 case XML_TEXT_NODE:
13515 case XML_CDATA_SECTION_NODE:
13516 case XML_ENTITY_REF_NODE:
13517 case XML_PI_NODE:
13518 case XML_COMMENT_NODE:
13519 case XML_DOCUMENT_NODE:
13520 case XML_HTML_DOCUMENT_NODE:
13521 break;
13522 default:
13523 return(XML_ERR_INTERNAL_ERROR);
13524
13525 }
13526 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13527 (node->type != XML_DOCUMENT_NODE) &&
13528 (node->type != XML_HTML_DOCUMENT_NODE))
13529 node = node->parent;
13530 if (node == NULL)
13531 return(XML_ERR_INTERNAL_ERROR);
13532 if (node->type == XML_ELEMENT_NODE)
13533 doc = node->doc;
13534 else
13535 doc = (xmlDocPtr) node;
13536 if (doc == NULL)
13537 return(XML_ERR_INTERNAL_ERROR);
13538
13539 /*
13540 * allocate a context and set-up everything not related to the
13541 * node position in the tree
13542 */
13543 if (doc->type == XML_DOCUMENT_NODE)
13544 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13545#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013546 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013547 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013548 /*
13549 * When parsing in context, it makes no sense to add implied
13550 * elements like html/body/etc...
13551 */
13552 options |= HTML_PARSE_NOIMPLIED;
13553 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013554#endif
13555 else
13556 return(XML_ERR_INTERNAL_ERROR);
13557
13558 if (ctxt == NULL)
13559 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013560
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013561 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013562 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13563 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13564 * we must wait until the last moment to free the original one.
13565 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013566 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013567 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013568 xmlDictFree(ctxt->dict);
13569 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013570 } else
13571 options |= XML_PARSE_NODICT;
13572
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013573 if (doc->encoding != NULL) {
13574 xmlCharEncodingHandlerPtr hdlr;
13575
13576 if (ctxt->encoding != NULL)
13577 xmlFree((xmlChar *) ctxt->encoding);
13578 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13579
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013580 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013581 if (hdlr != NULL) {
13582 xmlSwitchToEncoding(ctxt, hdlr);
13583 } else {
13584 return(XML_ERR_UNSUPPORTED_ENCODING);
13585 }
13586 }
13587
Daniel Veillard37334572008-07-31 08:20:02 +000013588 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013589 xmlDetectSAX2(ctxt);
13590 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013591 /* parsing in context, i.e. as within existing content */
Daniel Veillardad88b542017-12-08 09:42:31 +010013592 ctxt->input_id = 2;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013593 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013594
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013595 fake = xmlNewComment(NULL);
13596 if (fake == NULL) {
13597 xmlFreeParserCtxt(ctxt);
13598 return(XML_ERR_NO_MEMORY);
13599 }
13600 xmlAddChild(node, fake);
13601
Daniel Veillard29b17482004-08-16 00:39:03 +000013602 if (node->type == XML_ELEMENT_NODE) {
13603 nodePush(ctxt, node);
13604 /*
13605 * initialize the SAX2 namespaces stack
13606 */
13607 cur = node;
13608 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13609 xmlNsPtr ns = cur->nsDef;
13610 const xmlChar *iprefix, *ihref;
13611
13612 while (ns != NULL) {
13613 if (ctxt->dict) {
13614 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13615 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13616 } else {
13617 iprefix = ns->prefix;
13618 ihref = ns->href;
13619 }
13620
13621 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13622 nsPush(ctxt, iprefix, ihref);
13623 nsnr++;
13624 }
13625 ns = ns->next;
13626 }
13627 cur = cur->parent;
13628 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013629 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013630
13631 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13632 /*
13633 * ID/IDREF registration will be done in xmlValidateElement below
13634 */
13635 ctxt->loadsubset |= XML_SKIP_IDS;
13636 }
13637
Daniel Veillard499cc922006-01-18 17:22:35 +000013638#ifdef LIBXML_HTML_ENABLED
13639 if (doc->type == XML_HTML_DOCUMENT_NODE)
13640 __htmlParseContent(ctxt);
13641 else
13642#endif
13643 xmlParseContent(ctxt);
13644
Daniel Veillard29b17482004-08-16 00:39:03 +000013645 nsPop(ctxt, nsnr);
13646 if ((RAW == '<') && (NXT(1) == '/')) {
13647 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13648 } else if (RAW != 0) {
13649 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13650 }
13651 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13653 ctxt->wellFormed = 0;
13654 }
13655
13656 if (!ctxt->wellFormed) {
13657 if (ctxt->errNo == 0)
13658 ret = XML_ERR_INTERNAL_ERROR;
13659 else
13660 ret = (xmlParserErrors)ctxt->errNo;
13661 } else {
13662 ret = XML_ERR_OK;
13663 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013664
Daniel Veillard29b17482004-08-16 00:39:03 +000013665 /*
13666 * Return the newly created nodeset after unlinking it from
13667 * the pseudo sibling.
13668 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013669
Daniel Veillard29b17482004-08-16 00:39:03 +000013670 cur = fake->next;
13671 fake->next = NULL;
13672 node->last = fake;
13673
13674 if (cur != NULL) {
13675 cur->prev = NULL;
13676 }
13677
13678 *lst = cur;
13679
13680 while (cur != NULL) {
13681 cur->parent = NULL;
13682 cur = cur->next;
13683 }
13684
13685 xmlUnlinkNode(fake);
13686 xmlFreeNode(fake);
13687
13688
13689 if (ret != XML_ERR_OK) {
13690 xmlFreeNodeList(*lst);
13691 *lst = NULL;
13692 }
William M. Brackc3f81342004-10-03 01:22:44 +000013693
William M. Brackb7b54de2004-10-06 16:38:01 +000013694 if (doc->dict != NULL)
13695 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013696 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013697
Daniel Veillard29b17482004-08-16 00:39:03 +000013698 return(ret);
13699#else /* !SAX2 */
13700 return(XML_ERR_INTERNAL_ERROR);
13701#endif
13702}
13703
Daniel Veillard81273902003-09-30 00:43:48 +000013704#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013705/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013706 * xmlParseBalancedChunkMemoryRecover:
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013707 * @doc: the document the chunk pertains to (must not be NULL)
13708 * @sax: the SAX handler block (possibly NULL)
Daniel Veillard58e44c92002-08-02 22:19:49 +000013709 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13710 * @depth: Used for loop detection, use 0
13711 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13712 * @lst: the return value for the set of parsed nodes
13713 * @recover: return nodes even if the data is broken (use 0)
13714 *
13715 *
13716 * Parse a well-balanced chunk of an XML document
13717 * called by the parser
13718 * The allowed sequence for the Well Balanced Chunk is the one defined by
13719 * the content production in the XML grammar:
13720 *
13721 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13722 *
13723 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13724 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013725 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013726 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013727 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13728 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013729 */
13730int
13731xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013732 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013733 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013734 xmlParserCtxtPtr ctxt;
13735 xmlDocPtr newDoc;
13736 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013737 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013738 int size;
13739 int ret = 0;
13740
Daniel Veillard0161e632008-08-28 15:36:32 +000013741 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013742 return(XML_ERR_ENTITY_LOOP);
13743 }
13744
13745
Daniel Veillardcda96922001-08-21 10:56:31 +000013746 if (lst != NULL)
13747 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013748 if (string == NULL)
13749 return(-1);
13750
13751 size = xmlStrlen(string);
13752
13753 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13754 if (ctxt == NULL) return(-1);
13755 ctxt->userData = ctxt;
13756 if (sax != NULL) {
13757 oldsax = ctxt->sax;
13758 ctxt->sax = sax;
13759 if (user_data != NULL)
13760 ctxt->userData = user_data;
13761 }
13762 newDoc = xmlNewDoc(BAD_CAST "1.0");
13763 if (newDoc == NULL) {
13764 xmlFreeParserCtxt(ctxt);
13765 return(-1);
13766 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013767 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013768 if ((doc != NULL) && (doc->dict != NULL)) {
13769 xmlDictFree(ctxt->dict);
13770 ctxt->dict = doc->dict;
13771 xmlDictReference(ctxt->dict);
13772 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13773 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13774 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13775 ctxt->dictNames = 1;
13776 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013777 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013778 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013779 /* doc == NULL is only supported for historic reasons */
Owen Taylor3473f882001-02-23 17:55:21 +000013780 if (doc != NULL) {
13781 newDoc->intSubset = doc->intSubset;
13782 newDoc->extSubset = doc->extSubset;
13783 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013784 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13785 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013786 if (sax != NULL)
13787 ctxt->sax = oldsax;
13788 xmlFreeParserCtxt(ctxt);
13789 newDoc->intSubset = NULL;
13790 newDoc->extSubset = NULL;
13791 xmlFreeDoc(newDoc);
13792 return(-1);
13793 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013794 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13795 nodePush(ctxt, newRoot);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013796 /* doc == NULL is only supported for historic reasons */
Owen Taylor3473f882001-02-23 17:55:21 +000013797 if (doc == NULL) {
13798 ctxt->myDoc = newDoc;
13799 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013800 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013801 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013802 /* Ensure that doc has XML spec namespace */
13803 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13804 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013805 }
13806 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardad88b542017-12-08 09:42:31 +010013807 ctxt->input_id = 2;
Owen Taylor3473f882001-02-23 17:55:21 +000013808 ctxt->depth = depth;
13809
13810 /*
13811 * Doing validity checking on chunk doesn't make sense
13812 */
13813 ctxt->validate = 0;
13814 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013815 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013816
Daniel Veillardb39bc392002-10-26 19:29:51 +000013817 if ( doc != NULL ){
13818 content = doc->children;
13819 doc->children = NULL;
13820 xmlParseContent(ctxt);
13821 doc->children = content;
13822 }
13823 else {
13824 xmlParseContent(ctxt);
13825 }
Owen Taylor3473f882001-02-23 17:55:21 +000013826 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013827 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013828 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013829 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013830 }
13831 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013832 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013833 }
13834
13835 if (!ctxt->wellFormed) {
13836 if (ctxt->errNo == 0)
13837 ret = 1;
13838 else
13839 ret = ctxt->errNo;
13840 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013841 ret = 0;
13842 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013843
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013844 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13845 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013846
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013847 /*
13848 * Return the newly created nodeset after unlinking it from
13849 * they pseudo parent.
13850 */
13851 cur = newDoc->children->children;
13852 *lst = cur;
13853 while (cur != NULL) {
13854 xmlSetTreeDoc(cur, doc);
13855 cur->parent = NULL;
13856 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013857 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013858 newDoc->children->children = NULL;
13859 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013860
13861 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013862 ctxt->sax = oldsax;
13863 xmlFreeParserCtxt(ctxt);
13864 newDoc->intSubset = NULL;
13865 newDoc->extSubset = NULL;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013866 /* This leaks the namespace list if doc == NULL */
Rob Richardsa02f1992006-09-16 14:04:26 +000013867 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013868 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013869
Owen Taylor3473f882001-02-23 17:55:21 +000013870 return(ret);
13871}
13872
13873/**
13874 * xmlSAXParseEntity:
13875 * @sax: the SAX handler block
13876 * @filename: the filename
13877 *
13878 * parse an XML external entity out of context and build a tree.
13879 * It use the given SAX function block to handle the parsing callback.
13880 * If sax is NULL, fallback to the default DOM tree building routines.
13881 *
13882 * [78] extParsedEnt ::= TextDecl? content
13883 *
13884 * This correspond to a "Well Balanced" chunk
13885 *
13886 * Returns the resulting document tree
13887 */
13888
13889xmlDocPtr
13890xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13891 xmlDocPtr ret;
13892 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013893
13894 ctxt = xmlCreateFileParserCtxt(filename);
13895 if (ctxt == NULL) {
13896 return(NULL);
13897 }
13898 if (sax != NULL) {
13899 if (ctxt->sax != NULL)
13900 xmlFree(ctxt->sax);
13901 ctxt->sax = sax;
13902 ctxt->userData = NULL;
13903 }
13904
Owen Taylor3473f882001-02-23 17:55:21 +000013905 xmlParseExtParsedEnt(ctxt);
13906
13907 if (ctxt->wellFormed)
13908 ret = ctxt->myDoc;
13909 else {
13910 ret = NULL;
13911 xmlFreeDoc(ctxt->myDoc);
13912 ctxt->myDoc = NULL;
13913 }
13914 if (sax != NULL)
13915 ctxt->sax = NULL;
13916 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013917
Owen Taylor3473f882001-02-23 17:55:21 +000013918 return(ret);
13919}
13920
13921/**
13922 * xmlParseEntity:
13923 * @filename: the filename
13924 *
13925 * parse an XML external entity out of context and build a tree.
13926 *
13927 * [78] extParsedEnt ::= TextDecl? content
13928 *
13929 * This correspond to a "Well Balanced" chunk
13930 *
13931 * Returns the resulting document tree
13932 */
13933
13934xmlDocPtr
13935xmlParseEntity(const char *filename) {
13936 return(xmlSAXParseEntity(NULL, filename));
13937}
Daniel Veillard81273902003-09-30 00:43:48 +000013938#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013939
13940/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013941 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013942 * @URL: the entity URL
13943 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013944 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013945 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013946 *
13947 * Create a parser context for an external entity
13948 * Automatic support for ZLIB/Compress compressed document is provided
13949 * by default if found at compile-time.
13950 *
13951 * Returns the new parser context or NULL
13952 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013953static xmlParserCtxtPtr
13954xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13955 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013956 xmlParserCtxtPtr ctxt;
13957 xmlParserInputPtr inputStream;
13958 char *directory = NULL;
13959 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013960
Owen Taylor3473f882001-02-23 17:55:21 +000013961 ctxt = xmlNewParserCtxt();
13962 if (ctxt == NULL) {
13963 return(NULL);
13964 }
13965
Daniel Veillard48247b42009-07-10 16:12:46 +020013966 if (pctx != NULL) {
13967 ctxt->options = pctx->options;
13968 ctxt->_private = pctx->_private;
Daniel Veillardad88b542017-12-08 09:42:31 +010013969 /*
13970 * this is a subparser of pctx, so the input_id should be
13971 * incremented to distinguish from main entity
13972 */
13973 ctxt->input_id = pctx->input_id + 1;
Rob Richards9c0aa472009-03-26 18:10:19 +000013974 }
13975
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013976 /* Don't read from stdin. */
13977 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13978 URL = BAD_CAST "./-";
13979
Owen Taylor3473f882001-02-23 17:55:21 +000013980 uri = xmlBuildURI(URL, base);
13981
13982 if (uri == NULL) {
13983 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13984 if (inputStream == NULL) {
13985 xmlFreeParserCtxt(ctxt);
13986 return(NULL);
13987 }
13988
13989 inputPush(ctxt, inputStream);
13990
13991 if ((ctxt->directory == NULL) && (directory == NULL))
13992 directory = xmlParserGetDirectory((char *)URL);
13993 if ((ctxt->directory == NULL) && (directory != NULL))
13994 ctxt->directory = directory;
13995 } else {
13996 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13997 if (inputStream == NULL) {
13998 xmlFree(uri);
13999 xmlFreeParserCtxt(ctxt);
14000 return(NULL);
14001 }
14002
14003 inputPush(ctxt, inputStream);
14004
14005 if ((ctxt->directory == NULL) && (directory == NULL))
14006 directory = xmlParserGetDirectory((char *)uri);
14007 if ((ctxt->directory == NULL) && (directory != NULL))
14008 ctxt->directory = directory;
14009 xmlFree(uri);
14010 }
Owen Taylor3473f882001-02-23 17:55:21 +000014011 return(ctxt);
14012}
14013
Rob Richards9c0aa472009-03-26 18:10:19 +000014014/**
14015 * xmlCreateEntityParserCtxt:
14016 * @URL: the entity URL
14017 * @ID: the entity PUBLIC ID
14018 * @base: a possible base for the target URI
14019 *
14020 * Create a parser context for an external entity
14021 * Automatic support for ZLIB/Compress compressed document is provided
14022 * by default if found at compile-time.
14023 *
14024 * Returns the new parser context or NULL
14025 */
14026xmlParserCtxtPtr
14027xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14028 const xmlChar *base) {
14029 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14030
14031}
14032
Owen Taylor3473f882001-02-23 17:55:21 +000014033/************************************************************************
14034 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014035 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014036 * *
14037 ************************************************************************/
14038
14039/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014040 * xmlCreateURLParserCtxt:
14041 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014042 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014043 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014044 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014045 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014046 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014047 *
14048 * Returns the new parser context or NULL
14049 */
14050xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014051xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014052{
14053 xmlParserCtxtPtr ctxt;
14054 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014055 char *directory = NULL;
14056
Owen Taylor3473f882001-02-23 17:55:21 +000014057 ctxt = xmlNewParserCtxt();
14058 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014059 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014060 return(NULL);
14061 }
14062
Daniel Veillarddf292f72005-01-16 19:00:15 +000014063 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014064 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014065 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014066
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014067 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014068 if (inputStream == NULL) {
14069 xmlFreeParserCtxt(ctxt);
14070 return(NULL);
14071 }
14072
Owen Taylor3473f882001-02-23 17:55:21 +000014073 inputPush(ctxt, inputStream);
14074 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014075 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014076 if ((ctxt->directory == NULL) && (directory != NULL))
14077 ctxt->directory = directory;
14078
14079 return(ctxt);
14080}
14081
Daniel Veillard61b93382003-11-03 14:28:31 +000014082/**
14083 * xmlCreateFileParserCtxt:
14084 * @filename: the filename
14085 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014086 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014087 * Automatic support for ZLIB/Compress compressed document is provided
14088 * by default if found at compile-time.
14089 *
14090 * Returns the new parser context or NULL
14091 */
14092xmlParserCtxtPtr
14093xmlCreateFileParserCtxt(const char *filename)
14094{
14095 return(xmlCreateURLParserCtxt(filename, 0));
14096}
14097
Daniel Veillard81273902003-09-30 00:43:48 +000014098#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014099/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014100 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014101 * @sax: the SAX handler block
14102 * @filename: the filename
14103 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14104 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014105 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014106 *
14107 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14108 * compressed document is provided by default if found at compile-time.
14109 * It use the given SAX function block to handle the parsing callback.
14110 * If sax is NULL, fallback to the default DOM tree building routines.
14111 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014112 * User data (void *) is stored within the parser context in the
14113 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014114 *
Owen Taylor3473f882001-02-23 17:55:21 +000014115 * Returns the resulting document tree
14116 */
14117
14118xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014119xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14120 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014121 xmlDocPtr ret;
14122 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014123
Daniel Veillard635ef722001-10-29 11:48:19 +000014124 xmlInitParser();
14125
Owen Taylor3473f882001-02-23 17:55:21 +000014126 ctxt = xmlCreateFileParserCtxt(filename);
14127 if (ctxt == NULL) {
14128 return(NULL);
14129 }
14130 if (sax != NULL) {
14131 if (ctxt->sax != NULL)
14132 xmlFree(ctxt->sax);
14133 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014134 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014135 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014136 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014137 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014138 }
Owen Taylor3473f882001-02-23 17:55:21 +000014139
Daniel Veillard37d2d162008-03-14 10:54:00 +000014140 if (ctxt->directory == NULL)
14141 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014142
Daniel Veillarddad3f682002-11-17 16:47:27 +000014143 ctxt->recovery = recovery;
14144
Owen Taylor3473f882001-02-23 17:55:21 +000014145 xmlParseDocument(ctxt);
14146
William M. Brackc07329e2003-09-08 01:57:30 +000014147 if ((ctxt->wellFormed) || recovery) {
14148 ret = ctxt->myDoc;
Haibo Huangd23e46c2020-10-28 22:26:09 -070014149 if ((ret != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014150 if (ctxt->input->buf->compressed > 0)
14151 ret->compression = 9;
14152 else
14153 ret->compression = ctxt->input->buf->compressed;
14154 }
William M. Brackc07329e2003-09-08 01:57:30 +000014155 }
Owen Taylor3473f882001-02-23 17:55:21 +000014156 else {
14157 ret = NULL;
14158 xmlFreeDoc(ctxt->myDoc);
14159 ctxt->myDoc = NULL;
14160 }
14161 if (sax != NULL)
14162 ctxt->sax = NULL;
14163 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014164
Owen Taylor3473f882001-02-23 17:55:21 +000014165 return(ret);
14166}
14167
14168/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014169 * xmlSAXParseFile:
14170 * @sax: the SAX handler block
14171 * @filename: the filename
14172 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14173 * documents
14174 *
14175 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14176 * compressed document is provided by default if found at compile-time.
14177 * It use the given SAX function block to handle the parsing callback.
14178 * If sax is NULL, fallback to the default DOM tree building routines.
14179 *
14180 * Returns the resulting document tree
14181 */
14182
14183xmlDocPtr
14184xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14185 int recovery) {
14186 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14187}
14188
14189/**
Owen Taylor3473f882001-02-23 17:55:21 +000014190 * xmlRecoverDoc:
14191 * @cur: a pointer to an array of xmlChar
14192 *
14193 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014194 * In the case the document is not Well Formed, a attempt to build a
14195 * tree is tried anyway
14196 *
14197 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014198 */
14199
14200xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014201xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014202 return(xmlSAXParseDoc(NULL, cur, 1));
14203}
14204
14205/**
14206 * xmlParseFile:
14207 * @filename: the filename
14208 *
14209 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14210 * compressed document is provided by default if found at compile-time.
14211 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014212 * Returns the resulting document tree if the file was wellformed,
14213 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014214 */
14215
14216xmlDocPtr
14217xmlParseFile(const char *filename) {
14218 return(xmlSAXParseFile(NULL, filename, 0));
14219}
14220
14221/**
14222 * xmlRecoverFile:
14223 * @filename: the filename
14224 *
14225 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14226 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014227 * In the case the document is not Well Formed, it attempts to build
14228 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014229 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014230 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014231 */
14232
14233xmlDocPtr
14234xmlRecoverFile(const char *filename) {
14235 return(xmlSAXParseFile(NULL, filename, 1));
14236}
14237
14238
14239/**
14240 * xmlSetupParserForBuffer:
14241 * @ctxt: an XML parser context
14242 * @buffer: a xmlChar * buffer
14243 * @filename: a file name
14244 *
14245 * Setup the parser context to parse a new buffer; Clears any prior
14246 * contents from the parser context. The buffer parameter must not be
14247 * NULL, but the filename parameter can be
14248 */
14249void
14250xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14251 const char* filename)
14252{
14253 xmlParserInputPtr input;
14254
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014255 if ((ctxt == NULL) || (buffer == NULL))
14256 return;
14257
Owen Taylor3473f882001-02-23 17:55:21 +000014258 input = xmlNewInputStream(ctxt);
14259 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014260 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014261 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014262 return;
14263 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014264
Owen Taylor3473f882001-02-23 17:55:21 +000014265 xmlClearParserCtxt(ctxt);
14266 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014267 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014268 input->base = buffer;
14269 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014270 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014271 inputPush(ctxt, input);
14272}
14273
14274/**
14275 * xmlSAXUserParseFile:
14276 * @sax: a SAX handler
14277 * @user_data: The user data returned on SAX callbacks
14278 * @filename: a file name
14279 *
14280 * parse an XML file and call the given SAX handler routines.
14281 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014282 *
Owen Taylor3473f882001-02-23 17:55:21 +000014283 * Returns 0 in case of success or a error number otherwise
14284 */
14285int
14286xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14287 const char *filename) {
14288 int ret = 0;
14289 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014290
Owen Taylor3473f882001-02-23 17:55:21 +000014291 ctxt = xmlCreateFileParserCtxt(filename);
14292 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014293 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014294 xmlFree(ctxt->sax);
14295 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014296 xmlDetectSAX2(ctxt);
14297
Owen Taylor3473f882001-02-23 17:55:21 +000014298 if (user_data != NULL)
14299 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014300
Owen Taylor3473f882001-02-23 17:55:21 +000014301 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014302
Owen Taylor3473f882001-02-23 17:55:21 +000014303 if (ctxt->wellFormed)
14304 ret = 0;
14305 else {
14306 if (ctxt->errNo != 0)
14307 ret = ctxt->errNo;
14308 else
14309 ret = -1;
14310 }
14311 if (sax != NULL)
14312 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014313 if (ctxt->myDoc != NULL) {
14314 xmlFreeDoc(ctxt->myDoc);
14315 ctxt->myDoc = NULL;
14316 }
Owen Taylor3473f882001-02-23 17:55:21 +000014317 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014318
Owen Taylor3473f882001-02-23 17:55:21 +000014319 return ret;
14320}
Daniel Veillard81273902003-09-30 00:43:48 +000014321#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014322
14323/************************************************************************
14324 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014325 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014326 * *
14327 ************************************************************************/
14328
14329/**
14330 * xmlCreateMemoryParserCtxt:
14331 * @buffer: a pointer to a char array
14332 * @size: the size of the array
14333 *
14334 * Create a parser context for an XML in-memory document.
14335 *
14336 * Returns the new parser context or NULL
14337 */
14338xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014339xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014340 xmlParserCtxtPtr ctxt;
14341 xmlParserInputPtr input;
14342 xmlParserInputBufferPtr buf;
14343
14344 if (buffer == NULL)
14345 return(NULL);
14346 if (size <= 0)
14347 return(NULL);
14348
14349 ctxt = xmlNewParserCtxt();
14350 if (ctxt == NULL)
14351 return(NULL);
14352
Daniel Veillard53350552003-09-18 13:35:51 +000014353 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014354 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014355 if (buf == NULL) {
14356 xmlFreeParserCtxt(ctxt);
14357 return(NULL);
14358 }
Owen Taylor3473f882001-02-23 17:55:21 +000014359
14360 input = xmlNewInputStream(ctxt);
14361 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014362 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014363 xmlFreeParserCtxt(ctxt);
14364 return(NULL);
14365 }
14366
14367 input->filename = NULL;
14368 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014369 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014370
14371 inputPush(ctxt, input);
14372 return(ctxt);
14373}
14374
Daniel Veillard81273902003-09-30 00:43:48 +000014375#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014376/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014377 * xmlSAXParseMemoryWithData:
14378 * @sax: the SAX handler block
14379 * @buffer: an pointer to a char array
14380 * @size: the size of the array
14381 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14382 * documents
14383 * @data: the userdata
14384 *
14385 * parse an XML in-memory block and use the given SAX function block
14386 * to handle the parsing callback. If sax is NULL, fallback to the default
14387 * DOM tree building routines.
14388 *
14389 * User data (void *) is stored within the parser context in the
14390 * context's _private member, so it is available nearly everywhere in libxml
14391 *
14392 * Returns the resulting document tree
14393 */
14394
14395xmlDocPtr
14396xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14397 int size, int recovery, void *data) {
14398 xmlDocPtr ret;
14399 xmlParserCtxtPtr ctxt;
14400
Daniel Veillardab2a7632009-07-09 08:45:03 +020014401 xmlInitParser();
14402
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014403 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14404 if (ctxt == NULL) return(NULL);
14405 if (sax != NULL) {
14406 if (ctxt->sax != NULL)
14407 xmlFree(ctxt->sax);
14408 ctxt->sax = sax;
14409 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014410 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014411 if (data!=NULL) {
14412 ctxt->_private=data;
14413 }
14414
Daniel Veillardadba5f12003-04-04 16:09:01 +000014415 ctxt->recovery = recovery;
14416
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014417 xmlParseDocument(ctxt);
14418
14419 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14420 else {
14421 ret = NULL;
14422 xmlFreeDoc(ctxt->myDoc);
14423 ctxt->myDoc = NULL;
14424 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014425 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014426 ctxt->sax = NULL;
14427 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014428
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014429 return(ret);
14430}
14431
14432/**
Owen Taylor3473f882001-02-23 17:55:21 +000014433 * xmlSAXParseMemory:
14434 * @sax: the SAX handler block
14435 * @buffer: an pointer to a char array
14436 * @size: the size of the array
14437 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14438 * documents
14439 *
14440 * parse an XML in-memory block and use the given SAX function block
14441 * to handle the parsing callback. If sax is NULL, fallback to the default
14442 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014443 *
Owen Taylor3473f882001-02-23 17:55:21 +000014444 * Returns the resulting document tree
14445 */
14446xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014447xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14448 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014449 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014450}
14451
14452/**
14453 * xmlParseMemory:
14454 * @buffer: an pointer to a char array
14455 * @size: the size of the array
14456 *
14457 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014458 *
Owen Taylor3473f882001-02-23 17:55:21 +000014459 * Returns the resulting document tree
14460 */
14461
Daniel Veillard50822cb2001-07-26 20:05:51 +000014462xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014463 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14464}
14465
14466/**
14467 * xmlRecoverMemory:
14468 * @buffer: an pointer to a char array
14469 * @size: the size of the array
14470 *
14471 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014472 * In the case the document is not Well Formed, an attempt to
14473 * build a tree is tried anyway
14474 *
14475 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014476 */
14477
Daniel Veillard50822cb2001-07-26 20:05:51 +000014478xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014479 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14480}
14481
14482/**
14483 * xmlSAXUserParseMemory:
14484 * @sax: a SAX handler
14485 * @user_data: The user data returned on SAX callbacks
14486 * @buffer: an in-memory XML document input
14487 * @size: the length of the XML document in bytes
14488 *
14489 * A better SAX parsing routine.
14490 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014491 *
Owen Taylor3473f882001-02-23 17:55:21 +000014492 * Returns 0 in case of success or a error number otherwise
14493 */
14494int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014495 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014496 int ret = 0;
14497 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014498
14499 xmlInitParser();
14500
Owen Taylor3473f882001-02-23 17:55:21 +000014501 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14502 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014503 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14504 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014505 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014506 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014507
Daniel Veillard30211a02001-04-26 09:33:18 +000014508 if (user_data != NULL)
14509 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014510
Owen Taylor3473f882001-02-23 17:55:21 +000014511 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014512
Owen Taylor3473f882001-02-23 17:55:21 +000014513 if (ctxt->wellFormed)
14514 ret = 0;
14515 else {
14516 if (ctxt->errNo != 0)
14517 ret = ctxt->errNo;
14518 else
14519 ret = -1;
14520 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014521 if (sax != NULL)
14522 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014523 if (ctxt->myDoc != NULL) {
14524 xmlFreeDoc(ctxt->myDoc);
14525 ctxt->myDoc = NULL;
14526 }
Owen Taylor3473f882001-02-23 17:55:21 +000014527 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014528
Owen Taylor3473f882001-02-23 17:55:21 +000014529 return ret;
14530}
Daniel Veillard81273902003-09-30 00:43:48 +000014531#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014532
14533/**
14534 * xmlCreateDocParserCtxt:
14535 * @cur: a pointer to an array of xmlChar
14536 *
14537 * Creates a parser context for an XML in-memory document.
14538 *
14539 * Returns the new parser context or NULL
14540 */
14541xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014542xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014543 int len;
14544
14545 if (cur == NULL)
14546 return(NULL);
14547 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014548 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014549}
14550
Daniel Veillard81273902003-09-30 00:43:48 +000014551#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014552/**
14553 * xmlSAXParseDoc:
14554 * @sax: the SAX handler block
14555 * @cur: a pointer to an array of xmlChar
14556 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14557 * documents
14558 *
14559 * parse an XML in-memory document and build a tree.
14560 * It use the given SAX function block to handle the parsing callback.
14561 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014562 *
Owen Taylor3473f882001-02-23 17:55:21 +000014563 * Returns the resulting document tree
14564 */
14565
14566xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014567xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014568 xmlDocPtr ret;
14569 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014570 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014571
Daniel Veillard38936062004-11-04 17:45:11 +000014572 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014573
14574
14575 ctxt = xmlCreateDocParserCtxt(cur);
14576 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014577 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014578 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014579 ctxt->sax = sax;
14580 ctxt->userData = NULL;
14581 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014582 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014583
14584 xmlParseDocument(ctxt);
14585 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14586 else {
14587 ret = NULL;
14588 xmlFreeDoc(ctxt->myDoc);
14589 ctxt->myDoc = NULL;
14590 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014591 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014592 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014593 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014594
Owen Taylor3473f882001-02-23 17:55:21 +000014595 return(ret);
14596}
14597
14598/**
14599 * xmlParseDoc:
14600 * @cur: a pointer to an array of xmlChar
14601 *
14602 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014603 *
Owen Taylor3473f882001-02-23 17:55:21 +000014604 * Returns the resulting document tree
14605 */
14606
14607xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014608xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014609 return(xmlSAXParseDoc(NULL, cur, 0));
14610}
Daniel Veillard81273902003-09-30 00:43:48 +000014611#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014612
Daniel Veillard81273902003-09-30 00:43:48 +000014613#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014614/************************************************************************
14615 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014616 * Specific function to keep track of entities references *
14617 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014618 * *
14619 ************************************************************************/
14620
14621static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14622
14623/**
14624 * xmlAddEntityReference:
14625 * @ent : A valid entity
14626 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014627 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014628 *
14629 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14630 */
14631static void
14632xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14633 xmlNodePtr lastNode)
14634{
14635 if (xmlEntityRefFunc != NULL) {
14636 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14637 }
14638}
14639
14640
14641/**
14642 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014643 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014644 *
14645 * Set the function to call call back when a xml reference has been made
14646 */
14647void
14648xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14649{
14650 xmlEntityRefFunc = func;
14651}
Daniel Veillard81273902003-09-30 00:43:48 +000014652#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014653
14654/************************************************************************
14655 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014656 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014657 * *
14658 ************************************************************************/
14659
14660#ifdef LIBXML_XPATH_ENABLED
14661#include <libxml/xpath.h>
14662#endif
14663
Daniel Veillardffa3c742005-07-21 13:24:09 +000014664extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014665static int xmlParserInitialized = 0;
14666
14667/**
14668 * xmlInitParser:
14669 *
14670 * Initialization function for the XML parser.
14671 * This is not reentrant. Call once before processing in case of
14672 * use in multithreaded programs.
14673 */
14674
14675void
14676xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014677 if (xmlParserInitialized != 0)
14678 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014679
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014680#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
Elliott Hughes60f5c162021-08-20 17:09:52 -070014681 if (xmlFree == free)
14682 atexit(xmlCleanupParser);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014683#endif
14684
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014685#ifdef LIBXML_THREAD_ENABLED
14686 __xmlGlobalInitMutexLock();
14687 if (xmlParserInitialized == 0) {
14688#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014689 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014690 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014691 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14692 (xmlGenericError == NULL))
14693 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014694 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014695 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014696 xmlInitCharEncodingHandlers();
14697 xmlDefaultSAXHandlerInit();
14698 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014699#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014700 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014701#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014702#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014703 htmlInitAutoClose();
14704 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014705#endif
14706#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014707 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014708#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014709 xmlParserInitialized = 1;
14710#ifdef LIBXML_THREAD_ENABLED
14711 }
14712 __xmlGlobalInitMutexUnlock();
14713#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014714}
14715
14716/**
14717 * xmlCleanupParser:
14718 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014719 * This function name is somewhat misleading. It does not clean up
14720 * parser state, it cleans up memory allocated by the library itself.
14721 * It is a cleanup function for the XML library. It tries to reclaim all
14722 * related global memory allocated for the library processing.
14723 * It doesn't deallocate any document related memory. One should
14724 * call xmlCleanupParser() only when the process has finished using
14725 * the library and all XML/HTML documents built with it.
14726 * See also xmlInitParser() which has the opposite function of preparing
14727 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014728 *
14729 * WARNING: if your application is multithreaded or has plugin support
14730 * calling this may crash the application if another thread or
14731 * a plugin is still using libxml2. It's sometimes very hard to
14732 * guess if libxml2 is in use in the application, some libraries
14733 * or plugins may use it without notice. In case of doubt abstain
14734 * from calling this function or do it just before calling exit()
14735 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014736 */
14737
14738void
14739xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014740 if (!xmlParserInitialized)
14741 return;
14742
Owen Taylor3473f882001-02-23 17:55:21 +000014743 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014744#ifdef LIBXML_CATALOG_ENABLED
14745 xmlCatalogCleanup();
14746#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014747 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014748 xmlCleanupInputCallbacks();
14749#ifdef LIBXML_OUTPUT_ENABLED
14750 xmlCleanupOutputCallbacks();
14751#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014752#ifdef LIBXML_SCHEMAS_ENABLED
14753 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014754 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014755#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014756 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014757 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014758 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014759 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014760 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014761}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014762
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014763#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14764 !defined(_WIN32)
14765static void
14766ATTRIBUTE_DESTRUCTOR
14767xmlDestructor(void) {
Haibo Huangf0a546b2020-09-01 20:28:19 -070014768 /*
14769 * Calling custom deallocation functions in a destructor can cause
14770 * problems, for example with Nokogiri.
14771 */
14772 if (xmlFree == free)
14773 xmlCleanupParser();
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014774}
14775#endif
14776
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014777/************************************************************************
14778 * *
14779 * New set (2.6.0) of simpler and more flexible APIs *
14780 * *
14781 ************************************************************************/
14782
14783/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014784 * DICT_FREE:
14785 * @str: a string
14786 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014787 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014788 * current scope
14789 */
14790#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014791 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014792 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14793 xmlFree((char *)(str));
14794
14795/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014796 * xmlCtxtReset:
14797 * @ctxt: an XML parser context
14798 *
14799 * Reset a parser context
14800 */
14801void
14802xmlCtxtReset(xmlParserCtxtPtr ctxt)
14803{
14804 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014805 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014806
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014807 if (ctxt == NULL)
14808 return;
14809
14810 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014811
14812 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14813 xmlFreeInputStream(input);
14814 }
14815 ctxt->inputNr = 0;
14816 ctxt->input = NULL;
14817
14818 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014819 if (ctxt->spaceTab != NULL) {
14820 ctxt->spaceTab[0] = -1;
14821 ctxt->space = &ctxt->spaceTab[0];
14822 } else {
14823 ctxt->space = NULL;
14824 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014825
14826
14827 ctxt->nodeNr = 0;
14828 ctxt->node = NULL;
14829
14830 ctxt->nameNr = 0;
14831 ctxt->name = NULL;
14832
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014833 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014834 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014835 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014836 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014837 DICT_FREE(ctxt->directory);
14838 ctxt->directory = NULL;
14839 DICT_FREE(ctxt->extSubURI);
14840 ctxt->extSubURI = NULL;
14841 DICT_FREE(ctxt->extSubSystem);
14842 ctxt->extSubSystem = NULL;
14843 if (ctxt->myDoc != NULL)
14844 xmlFreeDoc(ctxt->myDoc);
14845 ctxt->myDoc = NULL;
14846
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014847 ctxt->standalone = -1;
14848 ctxt->hasExternalSubset = 0;
14849 ctxt->hasPErefs = 0;
14850 ctxt->html = 0;
14851 ctxt->external = 0;
14852 ctxt->instate = XML_PARSER_START;
14853 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014854
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014855 ctxt->wellFormed = 1;
14856 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014857 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014858 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014859#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014860 ctxt->vctxt.userData = ctxt;
14861 ctxt->vctxt.error = xmlParserValidityError;
14862 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014863#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014864 ctxt->record_info = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014865 ctxt->checkIndex = 0;
14866 ctxt->inSubset = 0;
14867 ctxt->errNo = XML_ERR_OK;
14868 ctxt->depth = 0;
14869 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14870 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014871 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014872 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014873 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014874 xmlInitNodeInfoSeq(&ctxt->node_seq);
14875
14876 if (ctxt->attsDefault != NULL) {
Nick Wellnhofere03f0a12017-11-09 16:42:47 +010014877 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014878 ctxt->attsDefault = NULL;
14879 }
14880 if (ctxt->attsSpecial != NULL) {
14881 xmlHashFree(ctxt->attsSpecial, NULL);
14882 ctxt->attsSpecial = NULL;
14883 }
14884
Daniel Veillard4432df22003-09-28 18:58:27 +000014885#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014886 if (ctxt->catalogs != NULL)
14887 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014888#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014889 if (ctxt->lastError.code != XML_ERR_OK)
14890 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014891}
14892
14893/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014894 * xmlCtxtResetPush:
14895 * @ctxt: an XML parser context
14896 * @chunk: a pointer to an array of chars
14897 * @size: number of chars in the array
14898 * @filename: an optional file name or URI
14899 * @encoding: the document encoding, or NULL
14900 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014901 * Reset a push parser context
14902 *
14903 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014904 */
14905int
14906xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14907 int size, const char *filename, const char *encoding)
14908{
14909 xmlParserInputPtr inputStream;
14910 xmlParserInputBufferPtr buf;
14911 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14912
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014913 if (ctxt == NULL)
14914 return(1);
14915
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014916 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14917 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14918
14919 buf = xmlAllocParserInputBuffer(enc);
14920 if (buf == NULL)
14921 return(1);
14922
14923 if (ctxt == NULL) {
14924 xmlFreeParserInputBuffer(buf);
14925 return(1);
14926 }
14927
14928 xmlCtxtReset(ctxt);
14929
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014930 if (filename == NULL) {
14931 ctxt->directory = NULL;
14932 } else {
14933 ctxt->directory = xmlParserGetDirectory(filename);
14934 }
14935
14936 inputStream = xmlNewInputStream(ctxt);
14937 if (inputStream == NULL) {
14938 xmlFreeParserInputBuffer(buf);
14939 return(1);
14940 }
14941
14942 if (filename == NULL)
14943 inputStream->filename = NULL;
14944 else
14945 inputStream->filename = (char *)
14946 xmlCanonicPath((const xmlChar *) filename);
14947 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014948 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014949
14950 inputPush(ctxt, inputStream);
14951
14952 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14953 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014954 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14955 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014956
14957 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14958
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014959 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014960#ifdef DEBUG_PUSH
14961 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14962#endif
14963 }
14964
14965 if (encoding != NULL) {
14966 xmlCharEncodingHandlerPtr hdlr;
14967
Daniel Veillard37334572008-07-31 08:20:02 +000014968 if (ctxt->encoding != NULL)
14969 xmlFree((xmlChar *) ctxt->encoding);
14970 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14971
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014972 hdlr = xmlFindCharEncodingHandler(encoding);
14973 if (hdlr != NULL) {
14974 xmlSwitchToEncoding(ctxt, hdlr);
14975 } else {
14976 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14977 "Unsupported encoding %s\n", BAD_CAST encoding);
14978 }
14979 } else if (enc != XML_CHAR_ENCODING_NONE) {
14980 xmlSwitchEncoding(ctxt, enc);
14981 }
14982
14983 return(0);
14984}
14985
Daniel Veillard37334572008-07-31 08:20:02 +000014986
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014987/**
Daniel Veillard37334572008-07-31 08:20:02 +000014988 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014989 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014990 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014991 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014992 *
14993 * Applies the options to the parser context
14994 *
14995 * Returns 0 in case of success, the set of unknown or unimplemented options
14996 * in case of error.
14997 */
Daniel Veillard37334572008-07-31 08:20:02 +000014998static int
14999xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015000{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015001 if (ctxt == NULL)
15002 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015003 if (encoding != NULL) {
15004 if (ctxt->encoding != NULL)
15005 xmlFree((xmlChar *) ctxt->encoding);
15006 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15007 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015008 if (options & XML_PARSE_RECOVER) {
15009 ctxt->recovery = 1;
15010 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015011 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015012 } else
15013 ctxt->recovery = 0;
15014 if (options & XML_PARSE_DTDLOAD) {
15015 ctxt->loadsubset = XML_DETECT_IDS;
15016 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015017 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015018 } else
15019 ctxt->loadsubset = 0;
15020 if (options & XML_PARSE_DTDATTR) {
15021 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15022 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015023 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015024 }
15025 if (options & XML_PARSE_NOENT) {
15026 ctxt->replaceEntities = 1;
15027 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15028 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015029 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015030 } else
15031 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015032 if (options & XML_PARSE_PEDANTIC) {
15033 ctxt->pedantic = 1;
15034 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015035 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015036 } else
15037 ctxt->pedantic = 0;
15038 if (options & XML_PARSE_NOBLANKS) {
15039 ctxt->keepBlanks = 0;
15040 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15041 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015042 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 } else
15044 ctxt->keepBlanks = 1;
15045 if (options & XML_PARSE_DTDVALID) {
15046 ctxt->validate = 1;
15047 if (options & XML_PARSE_NOWARNING)
15048 ctxt->vctxt.warning = NULL;
15049 if (options & XML_PARSE_NOERROR)
15050 ctxt->vctxt.error = NULL;
15051 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015052 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015053 } else
15054 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015055 if (options & XML_PARSE_NOWARNING) {
15056 ctxt->sax->warning = NULL;
15057 options -= XML_PARSE_NOWARNING;
15058 }
15059 if (options & XML_PARSE_NOERROR) {
15060 ctxt->sax->error = NULL;
15061 ctxt->sax->fatalError = NULL;
15062 options -= XML_PARSE_NOERROR;
15063 }
Daniel Veillard81273902003-09-30 00:43:48 +000015064#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015065 if (options & XML_PARSE_SAX1) {
15066 ctxt->sax->startElement = xmlSAX2StartElement;
15067 ctxt->sax->endElement = xmlSAX2EndElement;
15068 ctxt->sax->startElementNs = NULL;
15069 ctxt->sax->endElementNs = NULL;
15070 ctxt->sax->initialized = 1;
15071 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015072 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015073 }
Daniel Veillard81273902003-09-30 00:43:48 +000015074#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015075 if (options & XML_PARSE_NODICT) {
15076 ctxt->dictNames = 0;
15077 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015078 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015079 } else {
15080 ctxt->dictNames = 1;
15081 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015082 if (options & XML_PARSE_NOCDATA) {
15083 ctxt->sax->cdataBlock = NULL;
15084 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015085 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015086 }
15087 if (options & XML_PARSE_NSCLEAN) {
15088 ctxt->options |= XML_PARSE_NSCLEAN;
15089 options -= XML_PARSE_NSCLEAN;
15090 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015091 if (options & XML_PARSE_NONET) {
15092 ctxt->options |= XML_PARSE_NONET;
15093 options -= XML_PARSE_NONET;
15094 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015095 if (options & XML_PARSE_COMPACT) {
15096 ctxt->options |= XML_PARSE_COMPACT;
15097 options -= XML_PARSE_COMPACT;
15098 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015099 if (options & XML_PARSE_OLD10) {
15100 ctxt->options |= XML_PARSE_OLD10;
15101 options -= XML_PARSE_OLD10;
15102 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015103 if (options & XML_PARSE_NOBASEFIX) {
15104 ctxt->options |= XML_PARSE_NOBASEFIX;
15105 options -= XML_PARSE_NOBASEFIX;
15106 }
15107 if (options & XML_PARSE_HUGE) {
15108 ctxt->options |= XML_PARSE_HUGE;
15109 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015110 if (ctxt->dict != NULL)
15111 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015112 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015113 if (options & XML_PARSE_OLDSAX) {
15114 ctxt->options |= XML_PARSE_OLDSAX;
15115 options -= XML_PARSE_OLDSAX;
15116 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015117 if (options & XML_PARSE_IGNORE_ENC) {
15118 ctxt->options |= XML_PARSE_IGNORE_ENC;
15119 options -= XML_PARSE_IGNORE_ENC;
15120 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015121 if (options & XML_PARSE_BIG_LINES) {
15122 ctxt->options |= XML_PARSE_BIG_LINES;
15123 options -= XML_PARSE_BIG_LINES;
15124 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015125 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015126 return (options);
15127}
15128
15129/**
Daniel Veillard37334572008-07-31 08:20:02 +000015130 * xmlCtxtUseOptions:
15131 * @ctxt: an XML parser context
15132 * @options: a combination of xmlParserOption
15133 *
15134 * Applies the options to the parser context
15135 *
15136 * Returns 0 in case of success, the set of unknown or unimplemented options
15137 * in case of error.
15138 */
15139int
15140xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15141{
15142 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15143}
15144
15145/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015146 * xmlDoRead:
15147 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015148 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015149 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015150 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015151 * @reuse: keep the context for reuse
15152 *
15153 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015154 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015155 * Returns the resulting document tree or NULL
15156 */
15157static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015158xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15159 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015160{
15161 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015162
15163 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015164 if (encoding != NULL) {
15165 xmlCharEncodingHandlerPtr hdlr;
15166
15167 hdlr = xmlFindCharEncodingHandler(encoding);
15168 if (hdlr != NULL)
15169 xmlSwitchToEncoding(ctxt, hdlr);
15170 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015171 if ((URL != NULL) && (ctxt->input != NULL) &&
15172 (ctxt->input->filename == NULL))
15173 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015174 xmlParseDocument(ctxt);
15175 if ((ctxt->wellFormed) || ctxt->recovery)
15176 ret = ctxt->myDoc;
15177 else {
15178 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015179 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015180 xmlFreeDoc(ctxt->myDoc);
15181 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015182 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015183 ctxt->myDoc = NULL;
15184 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015185 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015186 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015187
15188 return (ret);
15189}
15190
15191/**
15192 * xmlReadDoc:
15193 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015194 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015195 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015196 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015197 *
15198 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015199 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015200 * Returns the resulting document tree
15201 */
15202xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015203xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015204{
15205 xmlParserCtxtPtr ctxt;
15206
15207 if (cur == NULL)
15208 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015209 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015210
15211 ctxt = xmlCreateDocParserCtxt(cur);
15212 if (ctxt == NULL)
15213 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015214 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015215}
15216
15217/**
15218 * xmlReadFile:
15219 * @filename: a file or URL
15220 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015221 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015222 *
15223 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015224 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015225 * Returns the resulting document tree
15226 */
15227xmlDocPtr
15228xmlReadFile(const char *filename, const char *encoding, int options)
15229{
15230 xmlParserCtxtPtr ctxt;
15231
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015232 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015233 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015234 if (ctxt == NULL)
15235 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015236 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015237}
15238
15239/**
15240 * xmlReadMemory:
15241 * @buffer: a pointer to a char array
15242 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015243 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015244 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015245 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015246 *
15247 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015248 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015249 * Returns the resulting document tree
15250 */
15251xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015252xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015253{
15254 xmlParserCtxtPtr ctxt;
15255
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015256 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015257 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15258 if (ctxt == NULL)
15259 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015260 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015261}
15262
15263/**
15264 * xmlReadFd:
15265 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015266 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015267 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015268 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015269 *
15270 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015271 * NOTE that the file descriptor will not be closed when the
15272 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015273 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015274 * Returns the resulting document tree
15275 */
15276xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015277xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015278{
15279 xmlParserCtxtPtr ctxt;
15280 xmlParserInputBufferPtr input;
15281 xmlParserInputPtr stream;
15282
15283 if (fd < 0)
15284 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015285 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015286
15287 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15288 if (input == NULL)
15289 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015290 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291 ctxt = xmlNewParserCtxt();
15292 if (ctxt == NULL) {
15293 xmlFreeParserInputBuffer(input);
15294 return (NULL);
15295 }
15296 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15297 if (stream == NULL) {
15298 xmlFreeParserInputBuffer(input);
15299 xmlFreeParserCtxt(ctxt);
15300 return (NULL);
15301 }
15302 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015303 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015304}
15305
15306/**
15307 * xmlReadIO:
15308 * @ioread: an I/O read function
15309 * @ioclose: an I/O close function
15310 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015311 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015312 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015313 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015314 *
15315 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015316 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015317 * Returns the resulting document tree
15318 */
15319xmlDocPtr
15320xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015321 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015322{
15323 xmlParserCtxtPtr ctxt;
15324 xmlParserInputBufferPtr input;
15325 xmlParserInputPtr stream;
15326
15327 if (ioread == NULL)
15328 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015329 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015330
15331 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15332 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015333 if (input == NULL) {
15334 if (ioclose != NULL)
15335 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015336 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015337 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015338 ctxt = xmlNewParserCtxt();
15339 if (ctxt == NULL) {
15340 xmlFreeParserInputBuffer(input);
15341 return (NULL);
15342 }
15343 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15344 if (stream == NULL) {
15345 xmlFreeParserInputBuffer(input);
15346 xmlFreeParserCtxt(ctxt);
15347 return (NULL);
15348 }
15349 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015350 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015351}
15352
15353/**
15354 * xmlCtxtReadDoc:
15355 * @ctxt: an XML parser context
15356 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015357 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015358 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015359 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015360 *
15361 * parse an XML in-memory document and build a tree.
15362 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015363 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015364 * Returns the resulting document tree
15365 */
15366xmlDocPtr
15367xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015368 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015369{
15370 xmlParserInputPtr stream;
15371
15372 if (cur == NULL)
15373 return (NULL);
15374 if (ctxt == NULL)
15375 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015376 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015377
15378 xmlCtxtReset(ctxt);
15379
15380 stream = xmlNewStringInputStream(ctxt, cur);
15381 if (stream == NULL) {
15382 return (NULL);
15383 }
15384 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015385 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015386}
15387
15388/**
15389 * xmlCtxtReadFile:
15390 * @ctxt: an XML parser context
15391 * @filename: a file or URL
15392 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015393 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015394 *
15395 * parse an XML file from the filesystem or the network.
15396 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015397 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015398 * Returns the resulting document tree
15399 */
15400xmlDocPtr
15401xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15402 const char *encoding, int options)
15403{
15404 xmlParserInputPtr stream;
15405
15406 if (filename == NULL)
15407 return (NULL);
15408 if (ctxt == NULL)
15409 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015410 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015411
15412 xmlCtxtReset(ctxt);
15413
Daniel Veillard29614c72004-11-26 10:47:26 +000015414 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015415 if (stream == NULL) {
15416 return (NULL);
15417 }
15418 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015419 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420}
15421
15422/**
15423 * xmlCtxtReadMemory:
15424 * @ctxt: an XML parser context
15425 * @buffer: a pointer to a char array
15426 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015427 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015429 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015430 *
15431 * parse an XML in-memory document and build a tree.
15432 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015433 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015434 * Returns the resulting document tree
15435 */
15436xmlDocPtr
15437xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015438 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015439{
15440 xmlParserInputBufferPtr input;
15441 xmlParserInputPtr stream;
15442
15443 if (ctxt == NULL)
15444 return (NULL);
15445 if (buffer == NULL)
15446 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015447 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015448
15449 xmlCtxtReset(ctxt);
15450
15451 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15452 if (input == NULL) {
15453 return(NULL);
15454 }
15455
15456 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15457 if (stream == NULL) {
15458 xmlFreeParserInputBuffer(input);
15459 return(NULL);
15460 }
15461
15462 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015463 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015464}
15465
15466/**
15467 * xmlCtxtReadFd:
15468 * @ctxt: an XML parser context
15469 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015470 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015471 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015472 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015473 *
15474 * parse an XML from a file descriptor and build a tree.
15475 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015476 * NOTE that the file descriptor will not be closed when the
15477 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015478 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015479 * Returns the resulting document tree
15480 */
15481xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015482xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15483 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015484{
15485 xmlParserInputBufferPtr input;
15486 xmlParserInputPtr stream;
15487
15488 if (fd < 0)
15489 return (NULL);
15490 if (ctxt == NULL)
15491 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015492 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015493
15494 xmlCtxtReset(ctxt);
15495
15496
15497 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15498 if (input == NULL)
15499 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015500 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015501 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15502 if (stream == NULL) {
15503 xmlFreeParserInputBuffer(input);
15504 return (NULL);
15505 }
15506 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015507 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015508}
15509
15510/**
15511 * xmlCtxtReadIO:
15512 * @ctxt: an XML parser context
15513 * @ioread: an I/O read function
15514 * @ioclose: an I/O close function
15515 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015516 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015517 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015518 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015519 *
15520 * parse an XML document from I/O functions and source and build a tree.
15521 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015522 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015523 * Returns the resulting document tree
15524 */
15525xmlDocPtr
15526xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15527 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015528 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015529 const char *encoding, int options)
15530{
15531 xmlParserInputBufferPtr input;
15532 xmlParserInputPtr stream;
15533
15534 if (ioread == NULL)
15535 return (NULL);
15536 if (ctxt == NULL)
15537 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015538 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015539
15540 xmlCtxtReset(ctxt);
15541
15542 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15543 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015544 if (input == NULL) {
15545 if (ioclose != NULL)
15546 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015547 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015548 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015549 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15550 if (stream == NULL) {
15551 xmlFreeParserInputBuffer(input);
15552 return (NULL);
15553 }
15554 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015555 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015556}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015557