blob: e1d139d5dab7d451af3f69b26ed6f0be8df1cd81 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Nick Wellnhofere3890542017-10-09 00:20:01 +020041#if defined(_WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080097static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
Haibo Huangcfd91dc2020-07-30 23:01:33 -070099static int
100xmlParseElementStart(xmlParserCtxtPtr ctxt);
101
102static void
103xmlParseElementEnd(xmlParserCtxtPtr ctxt);
104
Daniel Veillard0161e632008-08-28 15:36:32 +0000105/************************************************************************
106 * *
107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
108 * *
109 ************************************************************************/
110
111#define XML_PARSER_BIG_ENTITY 1000
112#define XML_PARSER_LOT_ENTITY 5000
113
114/*
115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116 * replacement over the size in byte of the input indicates that you have
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700117 * and exponential behaviour. A value of 10 correspond to at least 3 entity
Daniel Veillard0161e632008-08-28 15:36:32 +0000118 * replacement per byte of input.
119 */
120#define XML_PARSER_NON_LINEAR 10
121
122/*
123 * xmlParserEntityCheck
124 *
125 * Function to check non-linear entity expansion behaviour
126 * This is here to detect and stop exponential linear entity expansion
127 * This is not a limitation of the parser but a safety
128 * boundary feature. It can be disabled with the XML_PARSE_HUGE
129 * parser option.
130 */
131static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800132xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000134{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800135 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000136
137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
138 return (0);
139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
140 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800141
142 /*
143 * This may look absurd but is needed to detect
144 * entities problems
145 */
146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800147 (ent->content != NULL) && (ent->checked == 0) &&
148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700149 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800150 xmlChar *rep;
151
152 ent->checked = 1;
153
Peter Simons8f30bdf2016-04-15 11:56:55 +0200154 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800155 rep = xmlStringDecodeEntities(ctxt, ent->content,
156 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200157 --ctxt->depth;
Nick Wellnhofer707ad082018-01-23 16:37:54 +0100158 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbdd66182016-05-23 12:27:58 +0800159 ent->content[0] = 0;
160 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800161
Haibo Huangcfd91dc2020-07-30 23:01:33 -0700162 diff = ctxt->nbentities - oldnbent + 1;
163 if (diff > INT_MAX / 2)
164 diff = INT_MAX / 2;
165 ent->checked = diff * 2;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800166 if (rep != NULL) {
167 if (xmlStrchr(rep, '<'))
168 ent->checked |= 1;
169 xmlFree(rep);
170 rep = NULL;
171 }
172 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800173 if (replacement != 0) {
174 if (replacement < XML_MAX_TEXT_LENGTH)
175 return(0);
176
177 /*
178 * If the volume of entity copy reaches 10 times the
179 * amount of parsed data and over the large text threshold
180 * then that's very likely to be an abuse.
181 */
182 if (ctxt->input != NULL) {
183 consumed = ctxt->input->consumed +
184 (ctxt->input->cur - ctxt->input->base);
185 }
186 consumed += ctxt->sizeentities;
187
188 if (replacement < XML_PARSER_NON_LINEAR * consumed)
189 return(0);
190 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000191 /*
192 * Do the check based on the replacement size of the entity
193 */
194 if (size < XML_PARSER_BIG_ENTITY)
195 return(0);
196
197 /*
198 * A limit on the amount of text data reasonably used
199 */
200 if (ctxt->input != NULL) {
201 consumed = ctxt->input->consumed +
202 (ctxt->input->cur - ctxt->input->base);
203 }
204 consumed += ctxt->sizeentities;
205
206 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
207 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
208 return (0);
209 } else if (ent != NULL) {
210 /*
211 * use the number of parsed entities in the replacement
212 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800213 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000214
215 /*
216 * The amount of data parsed counting entities size only once
217 */
218 if (ctxt->input != NULL) {
219 consumed = ctxt->input->consumed +
220 (ctxt->input->cur - ctxt->input->base);
221 }
222 consumed += ctxt->sizeentities;
223
224 /*
225 * Check the density of entities for the amount of data
226 * knowing an entity reference will take at least 3 bytes
227 */
228 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
229 return (0);
230 } else {
231 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800232 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000233 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800234 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
235 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
236 (ctxt->nbentities <= 10000))
237 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000238 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000239 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
240 return (1);
241}
242
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000243/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000244 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000245 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000246 * arbitrary depth limit for the XML documents that we allow to
247 * process. This is not a limitation of the parser but a safety
248 * boundary feature. It can be disabled with the XML_PARSE_HUGE
249 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000250 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000251unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000252
Daniel Veillard0fb18932003-09-07 09:14:37 +0000253
Daniel Veillard0161e632008-08-28 15:36:32 +0000254
255#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000256#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000257#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000258#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
259
Daniel Veillard1f972e92012-08-15 10:16:37 +0800260/**
261 * XML_PARSER_CHUNK_SIZE
262 *
263 * When calling GROW that's the minimal amount of data
264 * the parser expected to have received. It is not a hard
265 * limit but an optimization when reading strings like Names
266 * It is not strictly needed as long as inputs available characters
267 * are followed by 0, which should be provided by the I/O level
268 */
269#define XML_PARSER_CHUNK_SIZE 100
270
Owen Taylor3473f882001-02-23 17:55:21 +0000271/*
Owen Taylor3473f882001-02-23 17:55:21 +0000272 * List of XML prefixed PI allowed by W3C specs
273 */
274
Daniel Veillardb44025c2001-10-11 22:55:55 +0000275static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000276 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800277 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000278 NULL
279};
280
Daniel Veillarda07050d2003-10-19 14:46:32 +0000281
Owen Taylor3473f882001-02-23 17:55:21 +0000282/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200283static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
284 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000285
Daniel Veillard7d515752003-09-26 19:12:37 +0000286static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000287xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
288 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000289 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000290 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000291
Daniel Veillard37334572008-07-31 08:20:02 +0000292static int
293xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
294 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000295#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000296static void
297xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
298 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000299#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000300
Daniel Veillard7d515752003-09-26 19:12:37 +0000301static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000302xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
303 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000304
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000305static int
306xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
307
Daniel Veillarde57ec792003-09-10 10:50:59 +0000308/************************************************************************
309 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800310 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000311 * *
312 ************************************************************************/
313
314/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 * xmlErrAttributeDup:
316 * @ctxt: an XML parser context
317 * @prefix: the attribute prefix
318 * @localname: the attribute localname
319 *
320 * Handle a redefinition of attribute error
321 */
322static void
323xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
324 const xmlChar * localname)
325{
Daniel Veillard157fee02003-10-31 10:36:03 +0000326 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
327 (ctxt->instate == XML_PARSER_EOF))
328 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000329 if (ctxt != NULL)
330 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200331
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000332 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000333 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200334 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 (const char *) localname, NULL, NULL, 0, 0,
336 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000337 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000338 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200339 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 (const char *) prefix, (const char *) localname,
341 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
342 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000343 if (ctxt != NULL) {
344 ctxt->wellFormed = 0;
345 if (ctxt->recovery == 0)
346 ctxt->disableSAX = 1;
347 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348}
349
350/**
351 * xmlFatalErr:
352 * @ctxt: an XML parser context
353 * @error: the error number
354 * @extra: extra information string
355 *
356 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
357 */
358static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360{
361 const char *errmsg;
362
Daniel Veillard157fee02003-10-31 10:36:03 +0000363 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
364 (ctxt->instate == XML_PARSER_EOF))
365 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 switch (error) {
367 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377 errmsg = "internal error";
378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800437 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800443 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 errmsg = "Fragment not allowed";
447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800461 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800464 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800467 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000468 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000469 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000473 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800474 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000475 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800477 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000479 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800480 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000481 break;
482 case XML_ERR_CONDSEC_INVALID_KEYWORD:
483 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800502 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800505 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000506 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000507 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800508 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000509 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800511 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000512 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000513 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000525 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000528 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000530 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000531 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800532 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000534 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800535 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000536 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800537 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800538 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800539 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000540#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000541 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800542 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000543 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000544#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000545 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800546 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000547 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000548 if (ctxt != NULL)
549 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800550 if (info == NULL) {
551 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
552 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
553 errmsg);
554 } else {
555 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
556 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
557 errmsg, info);
558 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000559 if (ctxt != NULL) {
560 ctxt->wellFormed = 0;
561 if (ctxt->recovery == 0)
562 ctxt->disableSAX = 1;
563 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000564}
565
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000566/**
567 * xmlFatalErrMsg:
568 * @ctxt: an XML parser context
569 * @error: the error number
570 * @msg: the error message
571 *
572 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
573 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800574static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000575xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
576 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000577{
Daniel Veillard157fee02003-10-31 10:36:03 +0000578 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
579 (ctxt->instate == XML_PARSER_EOF))
580 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000581 if (ctxt != NULL)
582 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000583 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200584 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 if (ctxt != NULL) {
586 ctxt->wellFormed = 0;
587 if (ctxt->recovery == 0)
588 ctxt->disableSAX = 1;
589 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000590}
591
592/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000593 * xmlWarningMsg:
594 * @ctxt: an XML parser context
595 * @error: the error number
596 * @msg: the error message
597 * @str1: extra data
598 * @str2: extra data
599 *
600 * Handle a warning.
601 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800602static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000603xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
604 const char *msg, const xmlChar *str1, const xmlChar *str2)
605{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000606 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000607
Daniel Veillard157fee02003-10-31 10:36:03 +0000608 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
609 (ctxt->instate == XML_PARSER_EOF))
610 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000611 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
612 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000613 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200614 if (ctxt != NULL) {
615 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000616 (ctxt->sax) ? ctxt->sax->warning : NULL,
617 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000618 ctxt, NULL, XML_FROM_PARSER, error,
619 XML_ERR_WARNING, NULL, 0,
620 (const char *) str1, (const char *) str2, NULL, 0, 0,
621 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200622 } else {
623 __xmlRaiseError(schannel, NULL, NULL,
624 ctxt, NULL, XML_FROM_PARSER, error,
625 XML_ERR_WARNING, NULL, 0,
626 (const char *) str1, (const char *) str2, NULL, 0, 0,
627 msg, (const char *) str1, (const char *) str2);
628 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000629}
630
631/**
632 * xmlValidityError:
633 * @ctxt: an XML parser context
634 * @error: the error number
635 * @msg: the error message
636 * @str1: extra data
637 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000638 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000639 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800640static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000641xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000642 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000643{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000644 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000645
646 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
647 (ctxt->instate == XML_PARSER_EOF))
648 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000649 if (ctxt != NULL) {
650 ctxt->errNo = error;
651 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
652 schannel = ctxt->sax->serror;
653 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200654 if (ctxt != NULL) {
655 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000656 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000657 ctxt, NULL, XML_FROM_DTD, error,
658 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000659 (const char *) str2, NULL, 0, 0,
660 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000661 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200662 } else {
663 __xmlRaiseError(schannel, NULL, NULL,
664 ctxt, NULL, XML_FROM_DTD, error,
665 XML_ERR_ERROR, NULL, 0, (const char *) str1,
666 (const char *) str2, NULL, 0, 0,
667 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000668 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000669}
670
671/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000672 * xmlFatalErrMsgInt:
673 * @ctxt: an XML parser context
674 * @error: the error number
675 * @msg: the error message
676 * @val: an integer value
677 *
678 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
679 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800680static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000681xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000682 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000683{
Daniel Veillard157fee02003-10-31 10:36:03 +0000684 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
685 (ctxt->instate == XML_PARSER_EOF))
686 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000687 if (ctxt != NULL)
688 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000689 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000690 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
691 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000692 if (ctxt != NULL) {
693 ctxt->wellFormed = 0;
694 if (ctxt->recovery == 0)
695 ctxt->disableSAX = 1;
696 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000697}
698
699/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000700 * xmlFatalErrMsgStrIntStr:
701 * @ctxt: an XML parser context
702 * @error: the error number
703 * @msg: the error message
704 * @str1: an string info
705 * @val: an integer value
706 * @str2: an string info
707 *
708 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
709 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800710static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000711xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800712 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000713 const xmlChar *str2)
714{
Daniel Veillard157fee02003-10-31 10:36:03 +0000715 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
716 (ctxt->instate == XML_PARSER_EOF))
717 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000718 if (ctxt != NULL)
719 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000720 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000721 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
722 NULL, 0, (const char *) str1, (const char *) str2,
723 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000724 if (ctxt != NULL) {
725 ctxt->wellFormed = 0;
726 if (ctxt->recovery == 0)
727 ctxt->disableSAX = 1;
728 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000729}
730
731/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000732 * xmlFatalErrMsgStr:
733 * @ctxt: an XML parser context
734 * @error: the error number
735 * @msg: the error message
736 * @val: a string value
737 *
738 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
739 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800740static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000741xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000742 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000743{
Daniel Veillard157fee02003-10-31 10:36:03 +0000744 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
745 (ctxt->instate == XML_PARSER_EOF))
746 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000747 if (ctxt != NULL)
748 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000749 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000750 XML_FROM_PARSER, error, XML_ERR_FATAL,
751 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
752 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000753 if (ctxt != NULL) {
754 ctxt->wellFormed = 0;
755 if (ctxt->recovery == 0)
756 ctxt->disableSAX = 1;
757 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000758}
759
760/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000761 * xmlErrMsgStr:
762 * @ctxt: an XML parser context
763 * @error: the error number
764 * @msg: the error message
765 * @val: a string value
766 *
767 * Handle a non fatal parser error
768 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800769static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000770xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
771 const char *msg, const xmlChar * val)
772{
Daniel Veillard157fee02003-10-31 10:36:03 +0000773 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
774 (ctxt->instate == XML_PARSER_EOF))
775 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000776 if (ctxt != NULL)
777 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000778 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000779 XML_FROM_PARSER, error, XML_ERR_ERROR,
780 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
781 val);
782}
783
784/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000785 * xmlNsErr:
786 * @ctxt: an XML parser context
787 * @error: the error number
788 * @msg: the message
789 * @info1: extra information string
790 * @info2: extra information string
791 *
792 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
793 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800794static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000795xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
796 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000797 const xmlChar * info1, const xmlChar * info2,
798 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000799{
Daniel Veillard157fee02003-10-31 10:36:03 +0000800 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
801 (ctxt->instate == XML_PARSER_EOF))
802 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000803 if (ctxt != NULL)
804 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000805 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000806 XML_ERR_ERROR, NULL, 0, (const char *) info1,
807 (const char *) info2, (const char *) info3, 0, 0, msg,
808 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000809 if (ctxt != NULL)
810 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000811}
812
Daniel Veillard37334572008-07-31 08:20:02 +0000813/**
814 * xmlNsWarn
815 * @ctxt: an XML parser context
816 * @error: the error number
817 * @msg: the message
818 * @info1: extra information string
819 * @info2: extra information string
820 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800821 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000822 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800823static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000824xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
825 const char *msg,
826 const xmlChar * info1, const xmlChar * info2,
827 const xmlChar * info3)
828{
829 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
830 (ctxt->instate == XML_PARSER_EOF))
831 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000832 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
833 XML_ERR_WARNING, NULL, 0, (const char *) info1,
834 (const char *) info2, (const char *) info3, 0, 0, msg,
835 info1, info2, info3);
836}
837
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000838/************************************************************************
839 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800840 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000841 * *
842 ************************************************************************/
843
844/**
845 * xmlHasFeature:
846 * @feature: the feature to be examined
847 *
848 * Examines if the library has been compiled with a given feature.
849 *
850 * Returns a non-zero value if the feature exist, otherwise zero.
851 * Returns zero (0) if the feature does not exist or an unknown
852 * unknown feature is requested, non-zero otherwise.
853 */
854int
855xmlHasFeature(xmlFeature feature)
856{
857 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000858 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000859#ifdef LIBXML_THREAD_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000864 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000865#ifdef LIBXML_TREE_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000870 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871#ifdef LIBXML_OUTPUT_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000876 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000877#ifdef LIBXML_PUSH_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000882 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000883#ifdef LIBXML_READER_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_PATTERN_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_WRITER_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_SAX1_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_FTP_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_HTTP_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_VALID_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef LIBXML_HTML_ENABLED
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_LEGACY_ENABLED
932 return(1);
933#else
934 return(0);
935#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000936 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000937#ifdef LIBXML_C14N_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000942 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000943#ifdef LIBXML_CATALOG_ENABLED
944 return(1);
945#else
946 return(0);
947#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000948 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000949#ifdef LIBXML_XPATH_ENABLED
950 return(1);
951#else
952 return(0);
953#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000954 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000955#ifdef LIBXML_XPTR_ENABLED
956 return(1);
957#else
958 return(0);
959#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000960 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000961#ifdef LIBXML_XINCLUDE_ENABLED
962 return(1);
963#else
964 return(0);
965#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000966 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000967#ifdef LIBXML_ICONV_ENABLED
968 return(1);
969#else
970 return(0);
971#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000972 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000973#ifdef LIBXML_ISO8859X_ENABLED
974 return(1);
975#else
976 return(0);
977#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000978 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000979#ifdef LIBXML_UNICODE_ENABLED
980 return(1);
981#else
982 return(0);
983#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000984 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000985#ifdef LIBXML_REGEXP_ENABLED
986 return(1);
987#else
988 return(0);
989#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000990 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000991#ifdef LIBXML_AUTOMATA_ENABLED
992 return(1);
993#else
994 return(0);
995#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000996 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000997#ifdef LIBXML_EXPR_ENABLED
998 return(1);
999#else
1000 return(0);
1001#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001002 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001003#ifdef LIBXML_SCHEMAS_ENABLED
1004 return(1);
1005#else
1006 return(0);
1007#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001008 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001009#ifdef LIBXML_SCHEMATRON_ENABLED
1010 return(1);
1011#else
1012 return(0);
1013#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001014 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001015#ifdef LIBXML_MODULES_ENABLED
1016 return(1);
1017#else
1018 return(0);
1019#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001020 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001021#ifdef LIBXML_DEBUG_ENABLED
1022 return(1);
1023#else
1024 return(0);
1025#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001026 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001027#ifdef DEBUG_MEMORY_LOCATION
1028 return(1);
1029#else
1030 return(0);
1031#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001032 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001033#ifdef LIBXML_DEBUG_RUNTIME
1034 return(1);
1035#else
1036 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001037#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001038 case XML_WITH_ZLIB:
1039#ifdef LIBXML_ZLIB_ENABLED
1040 return(1);
1041#else
1042 return(0);
1043#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001044 case XML_WITH_LZMA:
1045#ifdef LIBXML_LZMA_ENABLED
1046 return(1);
1047#else
1048 return(0);
1049#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001050 case XML_WITH_ICU:
1051#ifdef LIBXML_ICU_ENABLED
1052 return(1);
1053#else
1054 return(0);
1055#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001056 default:
1057 break;
1058 }
1059 return(0);
1060}
1061
1062/************************************************************************
1063 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001064 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001065 * *
1066 ************************************************************************/
1067
1068/**
1069 * xmlDetectSAX2:
1070 * @ctxt: an XML parser context
1071 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001072 * Do the SAX2 detection and specific initialization
Daniel Veillarde57ec792003-09-10 10:50:59 +00001073 */
1074static void
1075xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1076 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001077#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001078 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1079 ((ctxt->sax->startElementNs != NULL) ||
1080 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001081#else
1082 ctxt->sax2 = 1;
1083#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001084
1085 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1086 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1087 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001088 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1089 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001090 xmlErrMemory(ctxt, NULL);
1091 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001092}
1093
Daniel Veillarde57ec792003-09-10 10:50:59 +00001094typedef struct _xmlDefAttrs xmlDefAttrs;
1095typedef xmlDefAttrs *xmlDefAttrsPtr;
1096struct _xmlDefAttrs {
1097 int nbAttrs; /* number of defaulted attributes on that element */
1098 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001099#if __STDC_VERSION__ >= 199901L
1100 /* Using a C99 flexible array member avoids UBSan errors. */
1101 const xmlChar *values[]; /* array of localname/prefix/values/external */
1102#else
1103 const xmlChar *values[5];
1104#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001105};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001106
1107/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001108 * xmlAttrNormalizeSpace:
1109 * @src: the source string
1110 * @dst: the target string
1111 *
1112 * Normalize the space in non CDATA attribute values:
1113 * If the attribute type is not CDATA, then the XML processor MUST further
1114 * process the normalized attribute value by discarding any leading and
1115 * trailing space (#x20) characters, and by replacing sequences of space
1116 * (#x20) characters by a single space (#x20) character.
1117 * Note that the size of dst need to be at least src, and if one doesn't need
1118 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1119 * passing src as dst is just fine.
1120 *
1121 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1122 * is needed.
1123 */
1124static xmlChar *
1125xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1126{
1127 if ((src == NULL) || (dst == NULL))
1128 return(NULL);
1129
1130 while (*src == 0x20) src++;
1131 while (*src != 0) {
1132 if (*src == 0x20) {
1133 while (*src == 0x20) src++;
1134 if (*src != 0)
1135 *dst++ = 0x20;
1136 } else {
1137 *dst++ = *src++;
1138 }
1139 }
1140 *dst = 0;
1141 if (dst == src)
1142 return(NULL);
1143 return(dst);
1144}
1145
1146/**
1147 * xmlAttrNormalizeSpace2:
1148 * @src: the source string
1149 *
1150 * Normalize the space in non CDATA attribute values, a slightly more complex
1151 * front end to avoid allocation problems when running on attribute values
1152 * coming from the input.
1153 *
1154 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1155 * is needed.
1156 */
1157static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001158xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001159{
1160 int i;
1161 int remove_head = 0;
1162 int need_realloc = 0;
1163 const xmlChar *cur;
1164
1165 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1166 return(NULL);
1167 i = *len;
1168 if (i <= 0)
1169 return(NULL);
1170
1171 cur = src;
1172 while (*cur == 0x20) {
1173 cur++;
1174 remove_head++;
1175 }
1176 while (*cur != 0) {
1177 if (*cur == 0x20) {
1178 cur++;
1179 if ((*cur == 0x20) || (*cur == 0)) {
1180 need_realloc = 1;
1181 break;
1182 }
1183 } else
1184 cur++;
1185 }
1186 if (need_realloc) {
1187 xmlChar *ret;
1188
1189 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1190 if (ret == NULL) {
1191 xmlErrMemory(ctxt, NULL);
1192 return(NULL);
1193 }
1194 xmlAttrNormalizeSpace(ret, ret);
1195 *len = (int) strlen((const char *)ret);
1196 return(ret);
1197 } else if (remove_head) {
1198 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001199 memmove(src, src + remove_head, 1 + *len);
1200 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001201 }
1202 return(NULL);
1203}
1204
1205/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001206 * xmlAddDefAttrs:
1207 * @ctxt: an XML parser context
1208 * @fullname: the element fullname
1209 * @fullattr: the attribute fullname
1210 * @value: the attribute value
1211 *
1212 * Add a defaulted attribute for an element
1213 */
1214static void
1215xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1216 const xmlChar *fullname,
1217 const xmlChar *fullattr,
1218 const xmlChar *value) {
1219 xmlDefAttrsPtr defaults;
1220 int len;
1221 const xmlChar *name;
1222 const xmlChar *prefix;
1223
Daniel Veillard6a31b832008-03-26 14:06:44 +00001224 /*
1225 * Allows to detect attribute redefinitions
1226 */
1227 if (ctxt->attsSpecial != NULL) {
1228 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1229 return;
1230 }
1231
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001233 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001234 if (ctxt->attsDefault == NULL)
1235 goto mem_error;
1236 }
1237
1238 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001239 * split the element name into prefix:localname , the string found
1240 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001241 */
1242 name = xmlSplitQName3(fullname, &len);
1243 if (name == NULL) {
1244 name = xmlDictLookup(ctxt->dict, fullname, -1);
1245 prefix = NULL;
1246 } else {
1247 name = xmlDictLookup(ctxt->dict, name, -1);
1248 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1249 }
1250
1251 /*
1252 * make sure there is some storage
1253 */
1254 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1255 if (defaults == NULL) {
1256 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001257 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 if (defaults == NULL)
1259 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001260 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001261 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1263 defaults, NULL) < 0) {
1264 xmlFree(defaults);
1265 goto mem_error;
1266 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001267 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001268 xmlDefAttrsPtr temp;
1269
1270 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001271 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001272 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001273 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001274 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001275 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001276 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1277 defaults, NULL) < 0) {
1278 xmlFree(defaults);
1279 goto mem_error;
1280 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001281 }
1282
1283 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001284 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001285 * are within the DTD and hen not associated to namespace names.
1286 */
1287 name = xmlSplitQName3(fullattr, &len);
1288 if (name == NULL) {
1289 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1290 prefix = NULL;
1291 } else {
1292 name = xmlDictLookup(ctxt->dict, name, -1);
1293 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1294 }
1295
Daniel Veillardae0765b2008-07-31 19:54:59 +00001296 defaults->values[5 * defaults->nbAttrs] = name;
1297 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001298 /* intern the string and precompute the end */
1299 len = xmlStrlen(value);
1300 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001301 defaults->values[5 * defaults->nbAttrs + 2] = value;
1302 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1303 if (ctxt->external)
1304 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1305 else
1306 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001307 defaults->nbAttrs++;
1308
1309 return;
1310
1311mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001312 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001313 return;
1314}
1315
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001316/**
1317 * xmlAddSpecialAttr:
1318 * @ctxt: an XML parser context
1319 * @fullname: the element fullname
1320 * @fullattr: the attribute fullname
1321 * @type: the attribute type
1322 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001323 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001324 */
1325static void
1326xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1327 const xmlChar *fullname,
1328 const xmlChar *fullattr,
1329 int type)
1330{
1331 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001332 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001333 if (ctxt->attsSpecial == NULL)
1334 goto mem_error;
1335 }
1336
Daniel Veillardac4118d2008-01-11 05:27:32 +00001337 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1338 return;
1339
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001340 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001341 (void *) (ptrdiff_t) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001342 return;
1343
1344mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001345 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001346 return;
1347}
1348
Daniel Veillard4432df22003-09-28 18:58:27 +00001349/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001350 * xmlCleanSpecialAttrCallback:
1351 *
1352 * Removes CDATA attributes from the special attribute table
1353 */
1354static void
1355xmlCleanSpecialAttrCallback(void *payload, void *data,
1356 const xmlChar *fullname, const xmlChar *fullattr,
1357 const xmlChar *unused ATTRIBUTE_UNUSED) {
1358 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1359
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001360 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001361 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1362 }
1363}
1364
1365/**
1366 * xmlCleanSpecialAttr:
1367 * @ctxt: an XML parser context
1368 *
1369 * Trim the list of attributes defined to remove all those of type
1370 * CDATA as they are not special. This call should be done when finishing
1371 * to parse the DTD and before starting to parse the document root.
1372 */
1373static void
1374xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1375{
1376 if (ctxt->attsSpecial == NULL)
1377 return;
1378
1379 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1380
1381 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1382 xmlHashFree(ctxt->attsSpecial, NULL);
1383 ctxt->attsSpecial = NULL;
1384 }
1385 return;
1386}
1387
1388/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001389 * xmlCheckLanguageID:
1390 * @lang: pointer to the string value
1391 *
1392 * Checks that the value conforms to the LanguageID production:
1393 *
1394 * NOTE: this is somewhat deprecated, those productions were removed from
1395 * the XML Second edition.
1396 *
1397 * [33] LanguageID ::= Langcode ('-' Subcode)*
1398 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1399 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1400 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1401 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1402 * [38] Subcode ::= ([a-z] | [A-Z])+
1403 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001404 * The current REC reference the successors of RFC 1766, currently 5646
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 *
1406 * http://www.rfc-editor.org/rfc/rfc5646.txt
1407 * langtag = language
1408 * ["-" script]
1409 * ["-" region]
1410 * *("-" variant)
1411 * *("-" extension)
1412 * ["-" privateuse]
1413 * language = 2*3ALPHA ; shortest ISO 639 code
1414 * ["-" extlang] ; sometimes followed by
1415 * ; extended language subtags
1416 * / 4ALPHA ; or reserved for future use
1417 * / 5*8ALPHA ; or registered language subtag
1418 *
1419 * extlang = 3ALPHA ; selected ISO 639 codes
1420 * *2("-" 3ALPHA) ; permanently reserved
1421 *
1422 * script = 4ALPHA ; ISO 15924 code
1423 *
1424 * region = 2ALPHA ; ISO 3166-1 code
1425 * / 3DIGIT ; UN M.49 code
1426 *
1427 * variant = 5*8alphanum ; registered variants
1428 * / (DIGIT 3alphanum)
1429 *
1430 * extension = singleton 1*("-" (2*8alphanum))
1431 *
1432 * ; Single alphanumerics
1433 * ; "x" reserved for private use
1434 * singleton = DIGIT ; 0 - 9
1435 * / %x41-57 ; A - W
1436 * / %x59-5A ; Y - Z
1437 * / %x61-77 ; a - w
1438 * / %x79-7A ; y - z
1439 *
1440 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1441 * The parser below doesn't try to cope with extension or privateuse
1442 * that could be added but that's not interoperable anyway
1443 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001444 * Returns 1 if correct 0 otherwise
1445 **/
1446int
1447xmlCheckLanguageID(const xmlChar * lang)
1448{
Daniel Veillard60587d62010-11-04 15:16:27 +01001449 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001450
1451 if (cur == NULL)
1452 return (0);
1453 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001454 ((cur[0] == 'I') && (cur[1] == '-')) ||
1455 ((cur[0] == 'x') && (cur[1] == '-')) ||
1456 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001457 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001458 * Still allow IANA code and user code which were coming
1459 * from the previous version of the XML-1.0 specification
1460 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001461 */
1462 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001463 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001464 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1465 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001466 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001467 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001468 nxt = cur;
1469 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1470 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1471 nxt++;
1472 if (nxt - cur >= 4) {
1473 /*
1474 * Reserved
1475 */
1476 if ((nxt - cur > 8) || (nxt[0] != 0))
1477 return(0);
1478 return(1);
1479 }
1480 if (nxt - cur < 2)
1481 return(0);
1482 /* we got an ISO 639 code */
1483 if (nxt[0] == 0)
1484 return(1);
1485 if (nxt[0] != '-')
1486 return(0);
1487
1488 nxt++;
1489 cur = nxt;
1490 /* now we can have extlang or script or region or variant */
1491 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1492 goto region_m49;
1493
1494 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496 nxt++;
1497 if (nxt - cur == 4)
1498 goto script;
1499 if (nxt - cur == 2)
1500 goto region;
1501 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502 goto variant;
1503 if (nxt - cur != 3)
1504 return(0);
1505 /* we parsed an extlang */
1506 if (nxt[0] == 0)
1507 return(1);
1508 if (nxt[0] != '-')
1509 return(0);
1510
1511 nxt++;
1512 cur = nxt;
1513 /* now we can have script or region or variant */
1514 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515 goto region_m49;
1516
1517 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519 nxt++;
1520 if (nxt - cur == 2)
1521 goto region;
1522 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523 goto variant;
1524 if (nxt - cur != 4)
1525 return(0);
1526 /* we parsed a script */
1527script:
1528 if (nxt[0] == 0)
1529 return(1);
1530 if (nxt[0] != '-')
1531 return(0);
1532
1533 nxt++;
1534 cur = nxt;
1535 /* now we can have region or variant */
1536 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1537 goto region_m49;
1538
1539 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1540 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1541 nxt++;
1542
1543 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1544 goto variant;
1545 if (nxt - cur != 2)
1546 return(0);
1547 /* we parsed a region */
1548region:
1549 if (nxt[0] == 0)
1550 return(1);
1551 if (nxt[0] != '-')
1552 return(0);
1553
1554 nxt++;
1555 cur = nxt;
1556 /* now we can just have a variant */
1557 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1558 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1559 nxt++;
1560
1561 if ((nxt - cur < 5) || (nxt - cur > 8))
1562 return(0);
1563
1564 /* we parsed a variant */
1565variant:
1566 if (nxt[0] == 0)
1567 return(1);
1568 if (nxt[0] != '-')
1569 return(0);
1570 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001571 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001572
1573region_m49:
1574 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1575 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1576 nxt += 3;
1577 goto region;
1578 }
1579 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001580}
1581
Owen Taylor3473f882001-02-23 17:55:21 +00001582/************************************************************************
1583 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001584 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001585 * *
1586 ************************************************************************/
1587
Daniel Veillard8ed10722009-08-20 19:17:36 +02001588static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1589 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001590
Daniel Veillard0fb18932003-09-07 09:14:37 +00001591#ifdef SAX2
1592/**
1593 * nsPush:
1594 * @ctxt: an XML parser context
1595 * @prefix: the namespace prefix or NULL
1596 * @URL: the namespace name
1597 *
1598 * Pushes a new parser namespace on top of the ns stack
1599 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001600 * Returns -1 in case of error, -2 if the namespace should be discarded
1601 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001602 */
1603static int
1604nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1605{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001606 if (ctxt->options & XML_PARSE_NSCLEAN) {
1607 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001608 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001609 if (ctxt->nsTab[i] == prefix) {
1610 /* in scope */
1611 if (ctxt->nsTab[i + 1] == URL)
1612 return(-2);
1613 /* out of scope keep it */
1614 break;
1615 }
1616 }
1617 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001618 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1619 ctxt->nsMax = 10;
1620 ctxt->nsNr = 0;
1621 ctxt->nsTab = (const xmlChar **)
1622 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1623 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001624 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001625 ctxt->nsMax = 0;
1626 return (-1);
1627 }
1628 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001629 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001630 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001631 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1632 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1633 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001634 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001635 ctxt->nsMax /= 2;
1636 return (-1);
1637 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001638 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001639 }
1640 ctxt->nsTab[ctxt->nsNr++] = prefix;
1641 ctxt->nsTab[ctxt->nsNr++] = URL;
1642 return (ctxt->nsNr);
1643}
1644/**
1645 * nsPop:
1646 * @ctxt: an XML parser context
1647 * @nr: the number to pop
1648 *
1649 * Pops the top @nr parser prefix/namespace from the ns stack
1650 *
1651 * Returns the number of namespaces removed
1652 */
1653static int
1654nsPop(xmlParserCtxtPtr ctxt, int nr)
1655{
1656 int i;
1657
1658 if (ctxt->nsTab == NULL) return(0);
1659 if (ctxt->nsNr < nr) {
1660 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1661 nr = ctxt->nsNr;
1662 }
1663 if (ctxt->nsNr <= 0)
1664 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001665
Daniel Veillard0fb18932003-09-07 09:14:37 +00001666 for (i = 0;i < nr;i++) {
1667 ctxt->nsNr--;
1668 ctxt->nsTab[ctxt->nsNr] = NULL;
1669 }
1670 return(nr);
1671}
1672#endif
1673
1674static int
1675xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1676 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001677 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001678 int maxatts;
1679
1680 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001682 atts = (const xmlChar **)
1683 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001684 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001685 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001686 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1687 if (attallocs == NULL) goto mem_error;
1688 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001689 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001690 } else if (nr + 5 > ctxt->maxatts) {
1691 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1693 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001695 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001696 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1697 (maxatts / 5) * sizeof(int));
1698 if (attallocs == NULL) goto mem_error;
1699 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001700 ctxt->maxatts = maxatts;
1701 }
1702 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001703mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001704 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001705 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001706}
1707
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001708/**
1709 * inputPush:
1710 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001711 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001712 *
1713 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001714 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001715 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001716 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001717int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1719{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001720 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001721 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001722 if (ctxt->inputNr >= ctxt->inputMax) {
1723 ctxt->inputMax *= 2;
1724 ctxt->inputTab =
1725 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1726 ctxt->inputMax *
1727 sizeof(ctxt->inputTab[0]));
1728 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001729 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001730 xmlFreeInputStream(value);
1731 ctxt->inputMax /= 2;
1732 value = NULL;
1733 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001734 }
1735 }
1736 ctxt->inputTab[ctxt->inputNr] = value;
1737 ctxt->input = value;
1738 return (ctxt->inputNr++);
1739}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001740/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001742 * @ctxt: an XML parser context
1743 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001744 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001745 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001746 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001747 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001748xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001749inputPop(xmlParserCtxtPtr ctxt)
1750{
1751 xmlParserInputPtr ret;
1752
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001753 if (ctxt == NULL)
1754 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001755 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001756 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001757 ctxt->inputNr--;
1758 if (ctxt->inputNr > 0)
1759 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1760 else
1761 ctxt->input = NULL;
1762 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001763 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001764 return (ret);
1765}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001766/**
1767 * nodePush:
1768 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001769 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001770 *
1771 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001772 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001773 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001774 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001775int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001776nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1777{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001778 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001780 xmlNodePtr *tmp;
1781
1782 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1783 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001784 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001785 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001786 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001787 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001788 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001789 ctxt->nodeTab = tmp;
1790 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001791 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001792 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1793 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001794 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001795 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001796 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001797 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001798 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001799 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001800 ctxt->nodeTab[ctxt->nodeNr] = value;
1801 ctxt->node = value;
1802 return (ctxt->nodeNr++);
1803}
Daniel Veillard8915c152008-08-26 13:05:34 +00001804
Daniel Veillard1c732d22002-11-30 11:22:59 +00001805/**
1806 * nodePop:
1807 * @ctxt: an XML parser context
1808 *
1809 * Pops the top element node from the node stack
1810 *
1811 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001812 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001813xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001814nodePop(xmlParserCtxtPtr ctxt)
1815{
1816 xmlNodePtr ret;
1817
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001818 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001819 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001820 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001821 ctxt->nodeNr--;
1822 if (ctxt->nodeNr > 0)
1823 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1824 else
1825 ctxt->node = NULL;
1826 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001827 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001828 return (ret);
1829}
Daniel Veillarda2351322004-06-27 12:08:10 +00001830
Daniel Veillard1c732d22002-11-30 11:22:59 +00001831/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001832 * nameNsPush:
1833 * @ctxt: an XML parser context
1834 * @value: the element name
1835 * @prefix: the element prefix
1836 * @URI: the element namespace name
1837 *
1838 * Pushes a new element name/prefix/URL on top of the name stack
1839 *
1840 * Returns -1 in case of error, the index in the stack otherwise
1841 */
1842static int
1843nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1844 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1845{
1846 if (ctxt->nameNr >= ctxt->nameMax) {
1847 const xmlChar * *tmp;
1848 void **tmp2;
1849 ctxt->nameMax *= 2;
1850 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1851 ctxt->nameMax *
1852 sizeof(ctxt->nameTab[0]));
1853 if (tmp == NULL) {
1854 ctxt->nameMax /= 2;
1855 goto mem_error;
1856 }
1857 ctxt->nameTab = tmp;
1858 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1859 ctxt->nameMax * 3 *
1860 sizeof(ctxt->pushTab[0]));
1861 if (tmp2 == NULL) {
1862 ctxt->nameMax /= 2;
1863 goto mem_error;
1864 }
1865 ctxt->pushTab = tmp2;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001866 } else if (ctxt->pushTab == NULL) {
1867 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
1868 sizeof(ctxt->pushTab[0]));
1869 if (ctxt->pushTab == NULL)
1870 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 }
1872 ctxt->nameTab[ctxt->nameNr] = value;
1873 ctxt->name = value;
1874 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1875 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001876 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 return (ctxt->nameNr++);
1878mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001879 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001880 return (-1);
1881}
Haibo Huangcfd91dc2020-07-30 23:01:33 -07001882#ifdef LIBXML_PUSH_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883/**
1884 * nameNsPop:
1885 * @ctxt: an XML parser context
1886 *
1887 * Pops the top element/prefix/URI name from the name stack
1888 *
1889 * Returns the name just removed
1890 */
1891static const xmlChar *
1892nameNsPop(xmlParserCtxtPtr ctxt)
1893{
1894 const xmlChar *ret;
1895
1896 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001897 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001898 ctxt->nameNr--;
1899 if (ctxt->nameNr > 0)
1900 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1901 else
1902 ctxt->name = NULL;
1903 ret = ctxt->nameTab[ctxt->nameNr];
1904 ctxt->nameTab[ctxt->nameNr] = NULL;
1905 return (ret);
1906}
Daniel Veillarda2351322004-06-27 12:08:10 +00001907#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001908
1909/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001910 * namePush:
1911 * @ctxt: an XML parser context
1912 * @value: the element name
1913 *
1914 * Pushes a new element name on top of the name stack
1915 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001916 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001917 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001918int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001919namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001920{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001921 if (ctxt == NULL) return (-1);
1922
Daniel Veillard1c732d22002-11-30 11:22:59 +00001923 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001924 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001925 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001926 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001927 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001928 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001929 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001930 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001931 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001932 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001933 }
1934 ctxt->nameTab[ctxt->nameNr] = value;
1935 ctxt->name = value;
1936 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001937mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001938 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001939 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001940}
1941/**
1942 * namePop:
1943 * @ctxt: an XML parser context
1944 *
1945 * Pops the top element name from the name stack
1946 *
1947 * Returns the name just removed
1948 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001949const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001950namePop(xmlParserCtxtPtr ctxt)
1951{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001952 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001953
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001954 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1955 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001956 ctxt->nameNr--;
1957 if (ctxt->nameNr > 0)
1958 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1959 else
1960 ctxt->name = NULL;
1961 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001962 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001963 return (ret);
1964}
Owen Taylor3473f882001-02-23 17:55:21 +00001965
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001966static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001967 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001968 int *tmp;
1969
Owen Taylor3473f882001-02-23 17:55:21 +00001970 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001971 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1972 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1973 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001974 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001975 ctxt->spaceMax /=2;
1976 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001977 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001978 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001979 }
1980 ctxt->spaceTab[ctxt->spaceNr] = val;
1981 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1982 return(ctxt->spaceNr++);
1983}
1984
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001985static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001986 int ret;
1987 if (ctxt->spaceNr <= 0) return(0);
1988 ctxt->spaceNr--;
1989 if (ctxt->spaceNr > 0)
1990 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1991 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001992 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001993 ret = ctxt->spaceTab[ctxt->spaceNr];
1994 ctxt->spaceTab[ctxt->spaceNr] = -1;
1995 return(ret);
1996}
1997
1998/*
1999 * Macros for accessing the content. Those should be used only by the parser,
2000 * and not exported.
2001 *
2002 * Dirty macros, i.e. one often need to make assumption on the context to
2003 * use them
2004 *
2005 * CUR_PTR return the current pointer to the xmlChar to be parsed.
2006 * To be used with extreme caution since operations consuming
2007 * characters may move the input buffer to a different location !
2008 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2009 * This should be used internally by the parser
2010 * only to compare to ASCII values otherwise it would break when
2011 * running with UTF-8 encoding.
2012 * RAW same as CUR but in the input buffer, bypass any token
2013 * extraction that may have been done
2014 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2015 * to compare on ASCII based substring.
2016 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002017 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002018 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002019 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002020 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2021 *
2022 * NEXT Skip to the next character, this does the proper decoding
2023 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002024 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002025 * CUR_CHAR(l) returns the current unicode character (int), set l
2026 * to the number of xmlChars used for the encoding [0-5].
2027 * CUR_SCHAR same but operate on a string instead of the context
2028 * COPY_BUF copy the current unicode char to the target buffer, increment
2029 * the index
2030 * GROW, SHRINK handling of input buffers
2031 */
2032
Daniel Veillardfdc91562002-07-01 21:52:03 +00002033#define RAW (*ctxt->input->cur)
2034#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002035#define NXT(val) ctxt->input->cur[(val)]
2036#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002037#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002038
Daniel Veillarda07050d2003-10-19 14:46:32 +00002039#define CMP4( s, c1, c2, c3, c4 ) \
2040 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2041 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2042#define CMP5( s, c1, c2, c3, c4, c5 ) \
2043 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2044#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2045 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2046#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2047 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2048#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2049 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2050#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2051 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2052 ((unsigned char *) s)[ 8 ] == c9 )
2053#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2054 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2055 ((unsigned char *) s)[ 9 ] == c10 )
2056
Owen Taylor3473f882001-02-23 17:55:21 +00002057#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002058 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002059 if (*ctxt->input->cur == 0) \
2060 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002061 } while (0)
2062
Daniel Veillard0b787f32004-03-26 17:29:53 +00002063#define SKIPL(val) do { \
2064 int skipl; \
2065 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002066 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002067 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002068 } else ctxt->input->col++; \
2069 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002070 ctxt->input->cur++; \
2071 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002072 if (*ctxt->input->cur == 0) \
2073 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002074 } while (0)
2075
Daniel Veillarda880b122003-04-21 21:36:41 +00002076#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002077 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2078 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002079 xmlSHRINK (ctxt);
2080
2081static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2082 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002083 if (*ctxt->input->cur == 0)
2084 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2085}
Owen Taylor3473f882001-02-23 17:55:21 +00002086
Daniel Veillarda880b122003-04-21 21:36:41 +00002087#define GROW if ((ctxt->progressive == 0) && \
2088 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002089 xmlGROW (ctxt);
2090
2091static void xmlGROW (xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002092 ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2093 ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
Longstreth Jon190a0b82014-02-06 10:58:17 +01002094
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002095 if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2096 (curBase > XML_MAX_LOOKUP_LIMIT)) &&
Vlad Tsyrklevich28f52fe2017-08-10 15:08:48 -07002097 ((ctxt->input->buf) &&
2098 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002099 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2100 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002101 xmlHaltParser(ctxt);
2102 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002103 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002104 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002105 if ((ctxt->input->cur > ctxt->input->end) ||
2106 (ctxt->input->cur < ctxt->input->base)) {
2107 xmlHaltParser(ctxt);
2108 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2109 return;
2110 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002111 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2112 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002113}
Owen Taylor3473f882001-02-23 17:55:21 +00002114
2115#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2116
2117#define NEXT xmlNextChar(ctxt)
2118
Daniel Veillard21a0f912001-02-25 19:54:14 +00002119#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002120 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002121 ctxt->input->cur++; \
2122 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002123 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002124 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2125 }
2126
Owen Taylor3473f882001-02-23 17:55:21 +00002127#define NEXTL(l) do { \
2128 if (*(ctxt->input->cur) == '\n') { \
2129 ctxt->input->line++; ctxt->input->col = 1; \
2130 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002131 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002132 } while (0)
2133
2134#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2135#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2136
2137#define COPY_BUF(l,b,i,v) \
2138 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002139 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002140
2141/**
2142 * xmlSkipBlankChars:
2143 * @ctxt: the XML parser context
2144 *
2145 * skip all blanks character found at that point in the input streams.
2146 * It pops up finished entities in the process if allowable at that point.
2147 *
2148 * Returns the number of space chars skipped
2149 */
2150
2151int
2152xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002153 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002154
2155 /*
2156 * It's Okay to use CUR/NEXT here since all the blanks are on
2157 * the ASCII range.
2158 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002159 if (ctxt->instate != XML_PARSER_DTD) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002160 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002161 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002162 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002163 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002164 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002165 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002166 if (*cur == '\n') {
2167 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002168 } else {
2169 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002170 }
2171 cur++;
2172 res++;
2173 if (*cur == 0) {
2174 ctxt->input->cur = cur;
2175 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2176 cur = ctxt->input->cur;
2177 }
2178 }
2179 ctxt->input->cur = cur;
2180 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002181 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2182
2183 while (1) {
2184 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002185 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002186 } else if (CUR == '%') {
2187 /*
2188 * Need to handle support of entities branching here
2189 */
2190 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2191 break;
2192 xmlParsePEReference(ctxt);
2193 } else if (CUR == 0) {
2194 if (ctxt->inputNr <= 1)
2195 break;
2196 xmlPopInput(ctxt);
2197 } else {
2198 break;
2199 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002200
2201 /*
2202 * Also increase the counter when entering or exiting a PERef.
2203 * The spec says: "When a parameter-entity reference is recognized
2204 * in the DTD and included, its replacement text MUST be enlarged
2205 * by the attachment of one leading and one following space (#x20)
2206 * character."
2207 */
2208 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002209 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002210 }
Owen Taylor3473f882001-02-23 17:55:21 +00002211 return(res);
2212}
2213
2214/************************************************************************
2215 * *
2216 * Commodity functions to handle entities *
2217 * *
2218 ************************************************************************/
2219
2220/**
2221 * xmlPopInput:
2222 * @ctxt: an XML parser context
2223 *
2224 * xmlPopInput: the current input pointed by ctxt->input came to an end
2225 * pop it and return the next char.
2226 *
2227 * Returns the current xmlChar in the parser context
2228 */
2229xmlChar
2230xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002231 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002232 if (xmlParserDebugEntities)
2233 xmlGenericError(xmlGenericErrorContext,
2234 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002235 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2236 (ctxt->instate != XML_PARSER_EOF))
2237 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2238 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002239 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002240 if (*ctxt->input->cur == 0)
2241 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 return(CUR);
2243}
2244
2245/**
2246 * xmlPushInput:
2247 * @ctxt: an XML parser context
2248 * @input: an XML parser input fragment (entity, XML fragment ...).
2249 *
2250 * xmlPushInput: switch to a new input stream which is stacked on top
2251 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002252 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002253 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002254int
Owen Taylor3473f882001-02-23 17:55:21 +00002255xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002256 int ret;
2257 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002258
2259 if (xmlParserDebugEntities) {
2260 if ((ctxt->input != NULL) && (ctxt->input->filename))
2261 xmlGenericError(xmlGenericErrorContext,
2262 "%s(%d): ", ctxt->input->filename,
2263 ctxt->input->line);
2264 xmlGenericError(xmlGenericErrorContext,
2265 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2266 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002267 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2268 (ctxt->inputNr > 1024)) {
2269 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2270 while (ctxt->inputNr > 1)
2271 xmlFreeInputStream(inputPop(ctxt));
2272 return(-1);
2273 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002274 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002275 if (ctxt->instate == XML_PARSER_EOF)
2276 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002277 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002278 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002279}
2280
2281/**
2282 * xmlParseCharRef:
2283 * @ctxt: an XML parser context
2284 *
2285 * parse Reference declarations
2286 *
2287 * [66] CharRef ::= '&#' [0-9]+ ';' |
2288 * '&#x' [0-9a-fA-F]+ ';'
2289 *
2290 * [ WFC: Legal Character ]
2291 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002292 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002293 *
2294 * Returns the value parsed (as an int), 0 in case of error
2295 */
2296int
2297xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002298 int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002299 int count = 0;
2300
Owen Taylor3473f882001-02-23 17:55:21 +00002301 /*
2302 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2303 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002304 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002305 (NXT(2) == 'x')) {
2306 SKIP(3);
2307 GROW;
2308 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002309 if (count++ > 20) {
2310 count = 0;
2311 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002312 if (ctxt->instate == XML_PARSER_EOF)
2313 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002314 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002315 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002316 val = val * 16 + (CUR - '0');
2317 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2318 val = val * 16 + (CUR - 'a') + 10;
2319 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2320 val = val * 16 + (CUR - 'A') + 10;
2321 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002322 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002323 val = 0;
2324 break;
2325 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002326 if (val > 0x110000)
2327 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002328
Owen Taylor3473f882001-02-23 17:55:21 +00002329 NEXT;
2330 count++;
2331 }
2332 if (RAW == ';') {
2333 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002334 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002335 ctxt->nbChars ++;
2336 ctxt->input->cur++;
2337 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002338 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002339 SKIP(2);
2340 GROW;
2341 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002342 if (count++ > 20) {
2343 count = 0;
2344 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002345 if (ctxt->instate == XML_PARSER_EOF)
2346 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002347 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002348 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002349 val = val * 10 + (CUR - '0');
2350 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002351 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002352 val = 0;
2353 break;
2354 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002355 if (val > 0x110000)
2356 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002357
Owen Taylor3473f882001-02-23 17:55:21 +00002358 NEXT;
2359 count++;
2360 }
2361 if (RAW == ';') {
2362 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002363 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002364 ctxt->nbChars ++;
2365 ctxt->input->cur++;
2366 }
2367 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002368 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002369 }
2370
2371 /*
2372 * [ WFC: Legal Character ]
2373 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002374 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002375 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002376 if (val >= 0x110000) {
2377 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2378 "xmlParseCharRef: character reference out of bounds\n",
2379 val);
2380 } else if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002381 return(val);
2382 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002383 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2384 "xmlParseCharRef: invalid xmlChar value %d\n",
2385 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002386 }
2387 return(0);
2388}
2389
2390/**
2391 * xmlParseStringCharRef:
2392 * @ctxt: an XML parser context
2393 * @str: a pointer to an index in the string
2394 *
2395 * parse Reference declarations, variant parsing from a string rather
2396 * than an an input flow.
2397 *
2398 * [66] CharRef ::= '&#' [0-9]+ ';' |
2399 * '&#x' [0-9a-fA-F]+ ';'
2400 *
2401 * [ WFC: Legal Character ]
2402 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002403 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002404 *
2405 * Returns the value parsed (as an int), 0 in case of error, str will be
2406 * updated to the current value of the index
2407 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002408static int
Owen Taylor3473f882001-02-23 17:55:21 +00002409xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2410 const xmlChar *ptr;
2411 xmlChar cur;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002412 int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002413
2414 if ((str == NULL) || (*str == NULL)) return(0);
2415 ptr = *str;
2416 cur = *ptr;
2417 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2418 ptr += 3;
2419 cur = *ptr;
2420 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002421 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002422 val = val * 16 + (cur - '0');
2423 else if ((cur >= 'a') && (cur <= 'f'))
2424 val = val * 16 + (cur - 'a') + 10;
2425 else if ((cur >= 'A') && (cur <= 'F'))
2426 val = val * 16 + (cur - 'A') + 10;
2427 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002428 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002429 val = 0;
2430 break;
2431 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002432 if (val > 0x110000)
2433 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002434
Owen Taylor3473f882001-02-23 17:55:21 +00002435 ptr++;
2436 cur = *ptr;
2437 }
2438 if (cur == ';')
2439 ptr++;
2440 } else if ((cur == '&') && (ptr[1] == '#')){
2441 ptr += 2;
2442 cur = *ptr;
2443 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002444 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002445 val = val * 10 + (cur - '0');
2446 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002447 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002448 val = 0;
2449 break;
2450 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002451 if (val > 0x110000)
2452 val = 0x110000;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002453
Owen Taylor3473f882001-02-23 17:55:21 +00002454 ptr++;
2455 cur = *ptr;
2456 }
2457 if (cur == ';')
2458 ptr++;
2459 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002460 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002461 return(0);
2462 }
2463 *str = ptr;
2464
2465 /*
2466 * [ WFC: Legal Character ]
2467 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002468 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002469 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002470 if (val >= 0x110000) {
2471 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2472 "xmlParseStringCharRef: character reference out of bounds\n",
2473 val);
2474 } else if (IS_CHAR(val)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002475 return(val);
2476 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002477 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2478 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2479 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002480 }
2481 return(0);
2482}
2483
2484/**
2485 * xmlParserHandlePEReference:
2486 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002487 *
Owen Taylor3473f882001-02-23 17:55:21 +00002488 * [69] PEReference ::= '%' Name ';'
2489 *
2490 * [ WFC: No Recursion ]
2491 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002492 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002493 *
2494 * [ WFC: Entity Declared ]
2495 * In a document without any DTD, a document with only an internal DTD
2496 * subset which contains no parameter entity references, or a document
2497 * with "standalone='yes'", ... ... The declaration of a parameter
2498 * entity must precede any reference to it...
2499 *
2500 * [ VC: Entity Declared ]
2501 * In a document with an external subset or external parameter entities
2502 * with "standalone='no'", ... ... The declaration of a parameter entity
2503 * must precede any reference to it...
2504 *
2505 * [ WFC: In DTD ]
2506 * Parameter-entity references may only appear in the DTD.
2507 * NOTE: misleading but this is handled.
2508 *
2509 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002510 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002511 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002512 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002513 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002514 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002515 */
2516void
2517xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002518 switch(ctxt->instate) {
2519 case XML_PARSER_CDATA_SECTION:
2520 return;
2521 case XML_PARSER_COMMENT:
2522 return;
2523 case XML_PARSER_START_TAG:
2524 return;
2525 case XML_PARSER_END_TAG:
2526 return;
2527 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002528 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 return;
2530 case XML_PARSER_PROLOG:
2531 case XML_PARSER_START:
2532 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002533 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002534 return;
2535 case XML_PARSER_ENTITY_DECL:
2536 case XML_PARSER_CONTENT:
2537 case XML_PARSER_ATTRIBUTE_VALUE:
2538 case XML_PARSER_PI:
2539 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002540 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002541 /* we just ignore it there */
2542 return;
2543 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002544 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002545 return;
2546 case XML_PARSER_ENTITY_VALUE:
2547 /*
2548 * NOTE: in the case of entity values, we don't do the
2549 * substitution here since we need the literal
2550 * entity value to be able to save the internal
2551 * subset of the document.
2552 * This will be handled by xmlStringDecodeEntities
2553 */
2554 return;
2555 case XML_PARSER_DTD:
2556 /*
2557 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2558 * In the internal DTD subset, parameter-entity references
2559 * can occur only where markup declarations can occur, not
2560 * within markup declarations.
2561 * In that case this is handled in xmlParseMarkupDecl
2562 */
2563 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2564 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002565 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002566 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002567 break;
2568 case XML_PARSER_IGNORE:
2569 return;
2570 }
2571
Nick Wellnhofer03904152017-06-05 21:16:00 +02002572 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002573}
2574
2575/*
2576 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002577 * buffer##_size is expected to be a size_t
2578 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002579 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002580#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002581 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002582 size_t new_size = buffer##_size * 2 + n; \
2583 if (new_size < buffer##_size) goto mem_error; \
2584 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002585 if (tmp == NULL) goto mem_error; \
2586 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002587 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002588}
2589
2590/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002591 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002592 * @ctxt: the parser context
2593 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002594 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002595 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2596 * @end: an end marker xmlChar, 0 if none
2597 * @end2: an end marker xmlChar, 0 if none
2598 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002599 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002600 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002601 *
2602 * [67] Reference ::= EntityRef | CharRef
2603 *
2604 * [69] PEReference ::= '%' Name ';'
2605 *
2606 * Returns A newly allocated string with the substitution done. The caller
2607 * must deallocate it !
2608 */
2609xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002610xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2611 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002612 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002613 size_t buffer_size = 0;
2614 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002615
2616 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002617 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002618 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002619 xmlEntityPtr ent;
2620 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002621
Daniel Veillarda82b1822004-11-08 16:24:57 +00002622 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002623 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002624 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002625
Daniel Veillard0161e632008-08-28 15:36:32 +00002626 if (((ctxt->depth > 40) &&
2627 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2628 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002629 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002630 return(NULL);
2631 }
2632
2633 /*
2634 * allocate a translation buffer.
2635 */
2636 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002637 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002638 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002639
2640 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002641 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002642 * we are operating on already parsed values.
2643 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002644 if (str < last)
2645 c = CUR_SCHAR(str, l);
2646 else
2647 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002648 while ((c != 0) && (c != end) && /* non input consuming loop */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002649 (c != end2) && (c != end3) &&
2650 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002651
2652 if (c == 0) break;
2653 if ((c == '&') && (str[1] == '#')) {
2654 int val = xmlParseStringCharRef(ctxt, &str);
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002655 if (val == 0)
2656 goto int_error;
2657 COPY_BUF(0,buffer,nbchars,val);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002658 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002659 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002660 }
Owen Taylor3473f882001-02-23 17:55:21 +00002661 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2662 if (xmlParserDebugEntities)
2663 xmlGenericError(xmlGenericErrorContext,
2664 "String decoding Entity Reference: %.30s\n",
2665 str);
2666 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002667 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002668 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002669 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002670 if ((ent != NULL) &&
2671 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2672 if (ent->content != NULL) {
2673 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002674 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002675 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002676 }
Owen Taylor3473f882001-02-23 17:55:21 +00002677 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002678 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2679 "predefined entity has no content\n");
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002680 goto int_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002681 }
2682 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002683 ctxt->depth++;
2684 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2685 0, 0, 0);
2686 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002687 if (rep == NULL)
2688 goto int_error;
Daniel Veillard0161e632008-08-28 15:36:32 +00002689
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002690 current = rep;
2691 while (*current != 0) { /* non input consuming loop */
2692 buffer[nbchars++] = *current++;
2693 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2694 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2695 goto int_error;
2696 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2697 }
2698 }
2699 xmlFree(rep);
2700 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002701 } else if (ent != NULL) {
2702 int i = xmlStrlen(ent->name);
2703 const xmlChar *cur = ent->name;
2704
2705 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002706 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002707 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002708 }
2709 for (;i > 0;i--)
2710 buffer[nbchars++] = *cur++;
2711 buffer[nbchars++] = ';';
2712 }
2713 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2714 if (xmlParserDebugEntities)
2715 xmlGenericError(xmlGenericErrorContext,
2716 "String decoding PE Reference: %.30s\n", str);
2717 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002718 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002719 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002720 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002721 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002722 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002723 /*
2724 * Note: external parsed entities will not be loaded,
2725 * it is not required for a non-validating parser to
Haibo Huangcfd91dc2020-07-30 23:01:33 -07002726 * complete external PEReferences coming from the
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002727 * internal subset
2728 */
2729 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2730 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2731 (ctxt->validate != 0)) {
2732 xmlLoadEntityContent(ctxt, ent);
2733 } else {
2734 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2735 "not validating will not read content for PE entity %s\n",
2736 ent->name, NULL);
2737 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002738 }
Owen Taylor3473f882001-02-23 17:55:21 +00002739 ctxt->depth++;
2740 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2741 0, 0, 0);
2742 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002743 if (rep == NULL)
2744 goto int_error;
2745 current = rep;
2746 while (*current != 0) { /* non input consuming loop */
2747 buffer[nbchars++] = *current++;
2748 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2749 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2750 goto int_error;
2751 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2752 }
2753 }
2754 xmlFree(rep);
2755 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002756 }
2757 } else {
2758 COPY_BUF(l,buffer,nbchars,c);
2759 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002760 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2761 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002762 }
2763 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002764 if (str < last)
2765 c = CUR_SCHAR(str, l);
2766 else
2767 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002768 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002769 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002770 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002771
2772mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002773 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002774int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002775 if (rep != NULL)
2776 xmlFree(rep);
2777 if (buffer != NULL)
2778 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002779 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002780}
2781
Daniel Veillarde57ec792003-09-10 10:50:59 +00002782/**
2783 * xmlStringDecodeEntities:
2784 * @ctxt: the parser context
2785 * @str: the input string
2786 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2787 * @end: an end marker xmlChar, 0 if none
2788 * @end2: an end marker xmlChar, 0 if none
2789 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002790 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002791 * Takes a entity string content and process to do the adequate substitutions.
2792 *
2793 * [67] Reference ::= EntityRef | CharRef
2794 *
2795 * [69] PEReference ::= '%' Name ';'
2796 *
2797 * Returns A newly allocated string with the substitution done. The caller
2798 * must deallocate it !
2799 */
2800xmlChar *
2801xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2802 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002803 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002804 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2805 end, end2, end3));
2806}
Owen Taylor3473f882001-02-23 17:55:21 +00002807
2808/************************************************************************
2809 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002810 * Commodity functions, cleanup needed ? *
2811 * *
2812 ************************************************************************/
2813
2814/**
2815 * areBlanks:
2816 * @ctxt: an XML parser context
2817 * @str: a xmlChar *
2818 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002819 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002820 *
2821 * Is this a sequence of blank chars that one can ignore ?
2822 *
2823 * Returns 1 if ignorable 0 otherwise.
2824 */
2825
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002826static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2827 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002828 int i, ret;
2829 xmlNodePtr lastChild;
2830
Daniel Veillard05c13a22001-09-09 08:38:09 +00002831 /*
2832 * Don't spend time trying to differentiate them, the same callback is
2833 * used !
2834 */
2835 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002836 return(0);
2837
Owen Taylor3473f882001-02-23 17:55:21 +00002838 /*
2839 * Check for xml:space value.
2840 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002841 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2842 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002843 return(0);
2844
2845 /*
2846 * Check that the string is made of blanks
2847 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002848 if (blank_chars == 0) {
2849 for (i = 0;i < len;i++)
2850 if (!(IS_BLANK_CH(str[i]))) return(0);
2851 }
Owen Taylor3473f882001-02-23 17:55:21 +00002852
2853 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002854 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002855 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002856 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002857 if (ctxt->myDoc != NULL) {
2858 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2859 if (ret == 0) return(1);
2860 if (ret == 1) return(0);
2861 }
2862
2863 /*
2864 * Otherwise, heuristic :-\
2865 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002866 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002867 if ((ctxt->node->children == NULL) &&
2868 (RAW == '<') && (NXT(1) == '/')) return(0);
2869
2870 lastChild = xmlGetLastChild(ctxt->node);
2871 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002872 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2873 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002874 } else if (xmlNodeIsText(lastChild))
2875 return(0);
2876 else if ((ctxt->node->children != NULL) &&
2877 (xmlNodeIsText(ctxt->node->children)))
2878 return(0);
2879 return(1);
2880}
2881
Owen Taylor3473f882001-02-23 17:55:21 +00002882/************************************************************************
2883 * *
2884 * Extra stuff for namespace support *
2885 * Relates to http://www.w3.org/TR/WD-xml-names *
2886 * *
2887 ************************************************************************/
2888
2889/**
2890 * xmlSplitQName:
2891 * @ctxt: an XML parser context
2892 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002893 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002894 *
2895 * parse an UTF8 encoded XML qualified name string
2896 *
2897 * [NS 5] QName ::= (Prefix ':')? LocalPart
2898 *
2899 * [NS 6] Prefix ::= NCName
2900 *
2901 * [NS 7] LocalPart ::= NCName
2902 *
2903 * Returns the local part, and prefix is updated
2904 * to get the Prefix if any.
2905 */
2906
2907xmlChar *
2908xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2909 xmlChar buf[XML_MAX_NAMELEN + 5];
2910 xmlChar *buffer = NULL;
2911 int len = 0;
2912 int max = XML_MAX_NAMELEN;
2913 xmlChar *ret = NULL;
2914 const xmlChar *cur = name;
2915 int c;
2916
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002917 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 *prefix = NULL;
2919
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002920 if (cur == NULL) return(NULL);
2921
Owen Taylor3473f882001-02-23 17:55:21 +00002922#ifndef XML_XML_NAMESPACE
2923 /* xml: prefix is not really a namespace */
2924 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2925 (cur[2] == 'l') && (cur[3] == ':'))
2926 return(xmlStrdup(name));
2927#endif
2928
Daniel Veillard597bc482003-07-24 16:08:28 +00002929 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002930 if (cur[0] == ':')
2931 return(xmlStrdup(name));
2932
2933 c = *cur++;
2934 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2935 buf[len++] = c;
2936 c = *cur++;
2937 }
2938 if (len >= max) {
2939 /*
2940 * Okay someone managed to make a huge name, so he's ready to pay
2941 * for the processing speed.
2942 */
2943 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002944
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002945 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002946 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002947 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002948 return(NULL);
2949 }
2950 memcpy(buffer, buf, len);
2951 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2952 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002953 xmlChar *tmp;
2954
Owen Taylor3473f882001-02-23 17:55:21 +00002955 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002957 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002958 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002959 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002960 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002961 return(NULL);
2962 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002963 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002964 }
2965 buffer[len++] = c;
2966 c = *cur++;
2967 }
2968 buffer[len] = 0;
2969 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002970
Daniel Veillard597bc482003-07-24 16:08:28 +00002971 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002972 if (buffer != NULL)
2973 xmlFree(buffer);
2974 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002975 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002976 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002977
Owen Taylor3473f882001-02-23 17:55:21 +00002978 if (buffer == NULL)
2979 ret = xmlStrndup(buf, len);
2980 else {
2981 ret = buffer;
2982 buffer = NULL;
2983 max = XML_MAX_NAMELEN;
2984 }
2985
2986
2987 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002988 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002989 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002990 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002991 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002992 }
Owen Taylor3473f882001-02-23 17:55:21 +00002993 len = 0;
2994
Daniel Veillardbb284f42002-10-16 18:02:47 +00002995 /*
2996 * Check that the first character is proper to start
2997 * a new name
2998 */
2999 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3000 ((c >= 0x41) && (c <= 0x5A)) ||
3001 (c == '_') || (c == ':'))) {
3002 int l;
3003 int first = CUR_SCHAR(cur, l);
3004
3005 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003006 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003007 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003008 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003009 }
3010 }
3011 cur++;
3012
Owen Taylor3473f882001-02-23 17:55:21 +00003013 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3014 buf[len++] = c;
3015 c = *cur++;
3016 }
3017 if (len >= max) {
3018 /*
3019 * Okay someone managed to make a huge name, so he's ready to pay
3020 * for the processing speed.
3021 */
3022 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003023
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003024 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003025 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003026 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003027 return(NULL);
3028 }
3029 memcpy(buffer, buf, len);
3030 while (c != 0) { /* tested bigname2.xml */
3031 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003032 xmlChar *tmp;
3033
Owen Taylor3473f882001-02-23 17:55:21 +00003034 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003035 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003036 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003037 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003038 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003039 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003040 return(NULL);
3041 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003042 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003043 }
3044 buffer[len++] = c;
3045 c = *cur++;
3046 }
3047 buffer[len] = 0;
3048 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003049
Owen Taylor3473f882001-02-23 17:55:21 +00003050 if (buffer == NULL)
3051 ret = xmlStrndup(buf, len);
3052 else {
3053 ret = buffer;
3054 }
3055 }
3056
3057 return(ret);
3058}
3059
3060/************************************************************************
3061 * *
3062 * The parser itself *
3063 * Relates to http://www.w3.org/TR/REC-xml *
3064 * *
3065 ************************************************************************/
3066
Daniel Veillard34e3f642008-07-29 09:02:27 +00003067/************************************************************************
3068 * *
3069 * Routines to parse Name, NCName and NmToken *
3070 * *
3071 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003072#ifdef DEBUG
3073static unsigned long nbParseName = 0;
3074static unsigned long nbParseNmToken = 0;
3075static unsigned long nbParseNCName = 0;
3076static unsigned long nbParseNCNameComplex = 0;
3077static unsigned long nbParseNameComplex = 0;
3078static unsigned long nbParseStringName = 0;
3079#endif
3080
Daniel Veillard34e3f642008-07-29 09:02:27 +00003081/*
3082 * The two following functions are related to the change of accepted
3083 * characters for Name and NmToken in the Revision 5 of XML-1.0
3084 * They correspond to the modified production [4] and the new production [4a]
3085 * changes in that revision. Also note that the macros used for the
3086 * productions Letter, Digit, CombiningChar and Extender are not needed
3087 * anymore.
3088 * We still keep compatibility to pre-revision5 parsing semantic if the
3089 * new XML_PARSE_OLD10 option is given to the parser.
3090 */
3091static int
3092xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3093 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3094 /*
3095 * Use the new checks of production [4] [4a] amd [5] of the
3096 * Update 5 of XML-1.0
3097 */
3098 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3099 (((c >= 'a') && (c <= 'z')) ||
3100 ((c >= 'A') && (c <= 'Z')) ||
3101 (c == '_') || (c == ':') ||
3102 ((c >= 0xC0) && (c <= 0xD6)) ||
3103 ((c >= 0xD8) && (c <= 0xF6)) ||
3104 ((c >= 0xF8) && (c <= 0x2FF)) ||
3105 ((c >= 0x370) && (c <= 0x37D)) ||
3106 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3107 ((c >= 0x200C) && (c <= 0x200D)) ||
3108 ((c >= 0x2070) && (c <= 0x218F)) ||
3109 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3110 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3111 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3112 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3113 ((c >= 0x10000) && (c <= 0xEFFFF))))
3114 return(1);
3115 } else {
3116 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3117 return(1);
3118 }
3119 return(0);
3120}
3121
3122static int
3123xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3124 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3125 /*
3126 * Use the new checks of production [4] [4a] amd [5] of the
3127 * Update 5 of XML-1.0
3128 */
3129 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3130 (((c >= 'a') && (c <= 'z')) ||
3131 ((c >= 'A') && (c <= 'Z')) ||
3132 ((c >= '0') && (c <= '9')) || /* !start */
3133 (c == '_') || (c == ':') ||
3134 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3135 ((c >= 0xC0) && (c <= 0xD6)) ||
3136 ((c >= 0xD8) && (c <= 0xF6)) ||
3137 ((c >= 0xF8) && (c <= 0x2FF)) ||
3138 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3139 ((c >= 0x370) && (c <= 0x37D)) ||
3140 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3141 ((c >= 0x200C) && (c <= 0x200D)) ||
3142 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3143 ((c >= 0x2070) && (c <= 0x218F)) ||
3144 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3145 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3146 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3147 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3148 ((c >= 0x10000) && (c <= 0xEFFFF))))
3149 return(1);
3150 } else {
3151 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3152 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003153 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003154 (IS_COMBINING(c)) ||
3155 (IS_EXTENDER(c)))
3156 return(1);
3157 }
3158 return(0);
3159}
3160
Daniel Veillarde57ec792003-09-10 10:50:59 +00003161static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003162 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003163
Daniel Veillard34e3f642008-07-29 09:02:27 +00003164static const xmlChar *
3165xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3166 int len = 0, l;
3167 int c;
3168 int count = 0;
3169
Daniel Veillardc6561462009-03-25 10:22:31 +00003170#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003171 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003172#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003173
3174 /*
3175 * Handler for more complex cases
3176 */
3177 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003178 if (ctxt->instate == XML_PARSER_EOF)
3179 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003180 c = CUR_CHAR(l);
3181 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3182 /*
3183 * Use the new checks of production [4] [4a] amd [5] of the
3184 * Update 5 of XML-1.0
3185 */
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!(((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 (c == '_') || (c == ':') ||
3190 ((c >= 0xC0) && (c <= 0xD6)) ||
3191 ((c >= 0xD8) && (c <= 0xF6)) ||
3192 ((c >= 0xF8) && (c <= 0x2FF)) ||
3193 ((c >= 0x370) && (c <= 0x37D)) ||
3194 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3195 ((c >= 0x200C) && (c <= 0x200D)) ||
3196 ((c >= 0x2070) && (c <= 0x218F)) ||
3197 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3198 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3199 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3200 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3201 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3202 return(NULL);
3203 }
3204 len += l;
3205 NEXTL(l);
3206 c = CUR_CHAR(l);
3207 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 ((c >= '0') && (c <= '9')) || /* !start */
3211 (c == '_') || (c == ':') ||
3212 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3213 ((c >= 0xC0) && (c <= 0xD6)) ||
3214 ((c >= 0xD8) && (c <= 0xF6)) ||
3215 ((c >= 0xF8) && (c <= 0x2FF)) ||
3216 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3217 ((c >= 0x370) && (c <= 0x37D)) ||
3218 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3219 ((c >= 0x200C) && (c <= 0x200D)) ||
3220 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3221 ((c >= 0x2070) && (c <= 0x218F)) ||
3222 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3223 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3224 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3225 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3226 ((c >= 0x10000) && (c <= 0xEFFFF))
3227 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003228 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003229 count = 0;
3230 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003231 if (ctxt->instate == XML_PARSER_EOF)
3232 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003233 }
3234 len += l;
3235 NEXTL(l);
3236 c = CUR_CHAR(l);
3237 }
3238 } else {
3239 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3240 (!IS_LETTER(c) && (c != '_') &&
3241 (c != ':'))) {
3242 return(NULL);
3243 }
3244 len += l;
3245 NEXTL(l);
3246 c = CUR_CHAR(l);
3247
3248 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3249 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3250 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003251 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003252 (IS_COMBINING(c)) ||
3253 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003254 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003255 count = 0;
3256 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003257 if (ctxt->instate == XML_PARSER_EOF)
3258 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003259 }
3260 len += l;
3261 NEXTL(l);
3262 c = CUR_CHAR(l);
3263 }
3264 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003265 if ((len > XML_MAX_NAME_LENGTH) &&
3266 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3267 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3268 return(NULL);
3269 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003270 if (ctxt->input->cur - ctxt->input->base < len) {
3271 /*
3272 * There were a couple of bugs where PERefs lead to to a change
3273 * of the buffer. Check the buffer size to avoid passing an invalid
3274 * pointer to xmlDictLookup.
3275 */
3276 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3277 "unexpected change of input buffer");
3278 return (NULL);
3279 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003280 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3281 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3282 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3283}
3284
Owen Taylor3473f882001-02-23 17:55:21 +00003285/**
3286 * xmlParseName:
3287 * @ctxt: an XML parser context
3288 *
3289 * parse an XML name.
3290 *
3291 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3292 * CombiningChar | Extender
3293 *
3294 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3295 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003296 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003297 *
3298 * Returns the Name parsed or NULL
3299 */
3300
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003301const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003302xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003303 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003304 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003305 int count = 0;
3306
3307 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003308
Daniel Veillardc6561462009-03-25 10:22:31 +00003309#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003310 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003311#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003312
Daniel Veillard48b2f892001-02-25 16:11:03 +00003313 /*
3314 * Accelerator for simple ASCII names
3315 */
3316 in = ctxt->input->cur;
3317 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3318 ((*in >= 0x41) && (*in <= 0x5A)) ||
3319 (*in == '_') || (*in == ':')) {
3320 in++;
3321 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3322 ((*in >= 0x41) && (*in <= 0x5A)) ||
3323 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003324 (*in == '_') || (*in == '-') ||
3325 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003326 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003327 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003328 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003329 if ((count > XML_MAX_NAME_LENGTH) &&
3330 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3331 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332 return(NULL);
3333 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003334 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003335 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003336 ctxt->nbChars += count;
3337 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003338 if (ret == NULL)
3339 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003340 return(ret);
3341 }
3342 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003343 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003344 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003345}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003346
Daniel Veillard34e3f642008-07-29 09:02:27 +00003347static const xmlChar *
3348xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3349 int len = 0, l;
3350 int c;
3351 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003352 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003353
Daniel Veillardc6561462009-03-25 10:22:31 +00003354#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003355 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003356#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003357
3358 /*
3359 * Handler for more complex cases
3360 */
3361 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003362 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003363 c = CUR_CHAR(l);
3364 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3365 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3366 return(NULL);
3367 }
3368
3369 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3370 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003371 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003372 if ((len > XML_MAX_NAME_LENGTH) &&
3373 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3374 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3375 return(NULL);
3376 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003377 count = 0;
3378 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003379 if (ctxt->instate == XML_PARSER_EOF)
3380 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381 }
3382 len += l;
3383 NEXTL(l);
3384 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003385 if (c == 0) {
3386 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003387 /*
3388 * when shrinking to extend the buffer we really need to preserve
3389 * the part of the name we already parsed. Hence rolling back
Haibo Huangcfd91dc2020-07-30 23:01:33 -07003390 * by current length.
Daniel Veillard51f02b02015-09-15 16:50:32 +08003391 */
3392 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003393 GROW;
3394 if (ctxt->instate == XML_PARSER_EOF)
3395 return(NULL);
Nick Wellnhofer132af1a2018-01-08 18:48:01 +01003396 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003397 c = CUR_CHAR(l);
3398 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003399 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003400 if ((len > XML_MAX_NAME_LENGTH) &&
3401 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3402 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3403 return(NULL);
3404 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003405 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003406}
3407
3408/**
3409 * xmlParseNCName:
3410 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003411 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003412 *
3413 * parse an XML name.
3414 *
3415 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3416 * CombiningChar | Extender
3417 *
3418 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3419 *
3420 * Returns the Name parsed or NULL
3421 */
3422
3423static const xmlChar *
3424xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003425 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003426 const xmlChar *ret;
3427 int count = 0;
3428
Daniel Veillardc6561462009-03-25 10:22:31 +00003429#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003430 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003431#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432
3433 /*
3434 * Accelerator for simple ASCII names
3435 */
3436 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003437 e = ctxt->input->end;
3438 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3439 ((*in >= 0x41) && (*in <= 0x5A)) ||
3440 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003441 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003442 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3443 ((*in >= 0x41) && (*in <= 0x5A)) ||
3444 ((*in >= 0x30) && (*in <= 0x39)) ||
3445 (*in == '_') || (*in == '-') ||
3446 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003447 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003448 if (in >= e)
3449 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003450 if ((*in > 0) && (*in < 0x80)) {
3451 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003452 if ((count > XML_MAX_NAME_LENGTH) &&
3453 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3454 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3455 return(NULL);
3456 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003457 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3458 ctxt->input->cur = in;
3459 ctxt->nbChars += count;
3460 ctxt->input->col += count;
3461 if (ret == NULL) {
3462 xmlErrMemory(ctxt, NULL);
3463 }
3464 return(ret);
3465 }
3466 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003467complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003468 return(xmlParseNCNameComplex(ctxt));
3469}
3470
Daniel Veillard46de64e2002-05-29 08:21:33 +00003471/**
3472 * xmlParseNameAndCompare:
3473 * @ctxt: an XML parser context
3474 *
3475 * parse an XML name and compares for match
3476 * (specialized for endtag parsing)
3477 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003478 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3479 * and the name for mismatch
3480 */
3481
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003482static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003483xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003484 register const xmlChar *cmp = other;
3485 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003486 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003487
3488 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003489 if (ctxt->instate == XML_PARSER_EOF)
3490 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003491
Daniel Veillard46de64e2002-05-29 08:21:33 +00003492 in = ctxt->input->cur;
3493 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003494 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003495 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003496 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003497 }
William M. Brack76e95df2003-10-18 16:20:14 +00003498 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003499 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003500 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003501 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003502 }
3503 /* failure (or end of input buffer), check with full function */
3504 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003505 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003506 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003507 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003508 }
3509 return ret;
3510}
3511
Owen Taylor3473f882001-02-23 17:55:21 +00003512/**
3513 * xmlParseStringName:
3514 * @ctxt: an XML parser context
3515 * @str: a pointer to the string pointer (IN/OUT)
3516 *
3517 * parse an XML name.
3518 *
3519 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3520 * CombiningChar | Extender
3521 *
3522 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3523 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003524 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003525 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003526 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003527 * is updated to the current location in the string.
3528 */
3529
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003530static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003531xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3532 xmlChar buf[XML_MAX_NAMELEN + 5];
3533 const xmlChar *cur = *str;
3534 int len = 0, l;
3535 int c;
3536
Daniel Veillardc6561462009-03-25 10:22:31 +00003537#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003538 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003539#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003540
Owen Taylor3473f882001-02-23 17:55:21 +00003541 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003542 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003543 return(NULL);
3544 }
3545
Daniel Veillard34e3f642008-07-29 09:02:27 +00003546 COPY_BUF(l,buf,len,c);
3547 cur += l;
3548 c = CUR_SCHAR(cur, l);
3549 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003550 COPY_BUF(l,buf,len,c);
3551 cur += l;
3552 c = CUR_SCHAR(cur, l);
3553 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3554 /*
3555 * Okay someone managed to make a huge name, so he's ready to pay
3556 * for the processing speed.
3557 */
3558 xmlChar *buffer;
3559 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003560
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003561 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003562 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003563 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003564 return(NULL);
3565 }
3566 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003567 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003568 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003569 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003570
3571 if ((len > XML_MAX_NAME_LENGTH) &&
3572 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3573 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3574 xmlFree(buffer);
3575 return(NULL);
3576 }
Owen Taylor3473f882001-02-23 17:55:21 +00003577 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003578 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003579 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003580 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003581 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003582 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003583 return(NULL);
3584 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003585 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003586 }
3587 COPY_BUF(l,buffer,len,c);
3588 cur += l;
3589 c = CUR_SCHAR(cur, l);
3590 }
3591 buffer[len] = 0;
3592 *str = cur;
3593 return(buffer);
3594 }
3595 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003596 if ((len > XML_MAX_NAME_LENGTH) &&
3597 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3598 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3599 return(NULL);
3600 }
Owen Taylor3473f882001-02-23 17:55:21 +00003601 *str = cur;
3602 return(xmlStrndup(buf, len));
3603}
3604
3605/**
3606 * xmlParseNmtoken:
3607 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003608 *
Owen Taylor3473f882001-02-23 17:55:21 +00003609 * parse an XML Nmtoken.
3610 *
3611 * [7] Nmtoken ::= (NameChar)+
3612 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003613 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003614 *
3615 * Returns the Nmtoken parsed or NULL
3616 */
3617
3618xmlChar *
3619xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3620 xmlChar buf[XML_MAX_NAMELEN + 5];
3621 int len = 0, l;
3622 int c;
3623 int count = 0;
3624
Daniel Veillardc6561462009-03-25 10:22:31 +00003625#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003626 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003627#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003628
Owen Taylor3473f882001-02-23 17:55:21 +00003629 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003630 if (ctxt->instate == XML_PARSER_EOF)
3631 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003632 c = CUR_CHAR(l);
3633
Daniel Veillard34e3f642008-07-29 09:02:27 +00003634 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003635 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003636 count = 0;
3637 GROW;
3638 }
3639 COPY_BUF(l,buf,len,c);
3640 NEXTL(l);
3641 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003642 if (c == 0) {
3643 count = 0;
3644 GROW;
3645 if (ctxt->instate == XML_PARSER_EOF)
3646 return(NULL);
3647 c = CUR_CHAR(l);
3648 }
Owen Taylor3473f882001-02-23 17:55:21 +00003649 if (len >= XML_MAX_NAMELEN) {
3650 /*
3651 * Okay someone managed to make a huge token, so he's ready to pay
3652 * for the processing speed.
3653 */
3654 xmlChar *buffer;
3655 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003656
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003657 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003658 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003659 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003660 return(NULL);
3661 }
3662 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003663 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003664 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003665 count = 0;
3666 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003667 if (ctxt->instate == XML_PARSER_EOF) {
3668 xmlFree(buffer);
3669 return(NULL);
3670 }
Owen Taylor3473f882001-02-23 17:55:21 +00003671 }
3672 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003673 xmlChar *tmp;
3674
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003675 if ((max > XML_MAX_NAME_LENGTH) &&
3676 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3677 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3678 xmlFree(buffer);
3679 return(NULL);
3680 }
Owen Taylor3473f882001-02-23 17:55:21 +00003681 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003682 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003683 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003684 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003685 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003686 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003687 return(NULL);
3688 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003689 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003690 }
3691 COPY_BUF(l,buffer,len,c);
3692 NEXTL(l);
3693 c = CUR_CHAR(l);
3694 }
3695 buffer[len] = 0;
3696 return(buffer);
3697 }
3698 }
3699 if (len == 0)
3700 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003701 if ((len > XML_MAX_NAME_LENGTH) &&
3702 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3703 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3704 return(NULL);
3705 }
Owen Taylor3473f882001-02-23 17:55:21 +00003706 return(xmlStrndup(buf, len));
3707}
3708
3709/**
3710 * xmlParseEntityValue:
3711 * @ctxt: an XML parser context
3712 * @orig: if non-NULL store a copy of the original entity value
3713 *
3714 * parse a value for ENTITY declarations
3715 *
3716 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3717 * "'" ([^%&'] | PEReference | Reference)* "'"
3718 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003719 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003720 */
3721
3722xmlChar *
3723xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3724 xmlChar *buf = NULL;
3725 int len = 0;
3726 int size = XML_PARSER_BUFFER_SIZE;
3727 int c, l;
3728 xmlChar stop;
3729 xmlChar *ret = NULL;
3730 const xmlChar *cur = NULL;
3731 xmlParserInputPtr input;
3732
3733 if (RAW == '"') stop = '"';
3734 else if (RAW == '\'') stop = '\'';
3735 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003736 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003737 return(NULL);
3738 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003739 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003740 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003741 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003742 return(NULL);
3743 }
3744
3745 /*
3746 * The content of the entity definition is copied in a buffer.
3747 */
3748
3749 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3750 input = ctxt->input;
3751 GROW;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003752 if (ctxt->instate == XML_PARSER_EOF)
3753 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003754 NEXT;
3755 c = CUR_CHAR(l);
3756 /*
3757 * NOTE: 4.4.5 Included in Literal
3758 * When a parameter entity reference appears in a literal entity
3759 * value, ... a single or double quote character in the replacement
3760 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003761 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003762 * In practice it means we stop the loop only when back at parsing
3763 * the initial entity and the quote is found
3764 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003765 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3766 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003767 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003768 xmlChar *tmp;
3769
Owen Taylor3473f882001-02-23 17:55:21 +00003770 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003771 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3772 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003773 xmlErrMemory(ctxt, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003774 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003775 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003776 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003777 }
3778 COPY_BUF(l,buf,len,c);
3779 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003780
3781 GROW;
3782 c = CUR_CHAR(l);
3783 if (c == 0) {
3784 GROW;
3785 c = CUR_CHAR(l);
3786 }
3787 }
3788 buf[len] = 0;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003789 if (ctxt->instate == XML_PARSER_EOF)
3790 goto error;
3791 if (c != stop) {
3792 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3793 goto error;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003794 }
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003795 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00003796
3797 /*
3798 * Raise problem w.r.t. '&' and '%' being used in non-entities
3799 * reference constructs. Note Charref will be handled in
3800 * xmlStringDecodeEntities()
3801 */
3802 cur = buf;
3803 while (*cur != 0) { /* non input consuming */
3804 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3805 xmlChar *name;
3806 xmlChar tmp = *cur;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003807 int nameOk = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003808
3809 cur++;
3810 name = xmlParseStringName(ctxt, &cur);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003811 if (name != NULL) {
3812 nameOk = 1;
3813 xmlFree(name);
3814 }
3815 if ((nameOk == 0) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003816 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003817 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003818 tmp);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003819 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003820 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003821 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3822 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003823 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003824 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003826 if (*cur == 0)
3827 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003828 }
3829 cur++;
3830 }
3831
3832 /*
3833 * Then PEReference entities are substituted.
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003834 *
3835 * NOTE: 4.4.7 Bypassed
3836 * When a general entity reference appears in the EntityValue in
3837 * an entity declaration, it is bypassed and left as is.
3838 * so XML_SUBSTITUTE_REF is not set here.
Owen Taylor3473f882001-02-23 17:55:21 +00003839 */
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003840 ++ctxt->depth;
3841 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3842 0, 0, 0);
3843 --ctxt->depth;
3844 if (orig != NULL) {
3845 *orig = buf;
3846 buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003847 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003848
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003849error:
3850 if (buf != NULL)
3851 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003852 return(ret);
3853}
3854
3855/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003856 * xmlParseAttValueComplex:
3857 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003858 * @len: the resulting attribute len
Haibo Huangcfd91dc2020-07-30 23:01:33 -07003859 * @normalize: whether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003860 *
3861 * parse a value for an attribute, this is the fallback function
3862 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003863 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003864 *
3865 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3866 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003867static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003868xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003869 xmlChar limit = 0;
3870 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003871 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003872 size_t len = 0;
3873 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003874 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003875 xmlChar *current = NULL;
3876 xmlEntityPtr ent;
3877
Owen Taylor3473f882001-02-23 17:55:21 +00003878 if (NXT(0) == '"') {
3879 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3880 limit = '"';
3881 NEXT;
3882 } else if (NXT(0) == '\'') {
3883 limit = '\'';
3884 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3885 NEXT;
3886 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003887 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003888 return(NULL);
3889 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003890
Owen Taylor3473f882001-02-23 17:55:21 +00003891 /*
3892 * allocate a translation buffer.
3893 */
3894 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003895 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003896 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003897
3898 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003899 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003900 */
3901 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003902 while (((NXT(0) != limit) && /* checked */
3903 (IS_CHAR(c)) && (c != '<')) &&
3904 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003905 /*
3906 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3907 * special option is given
3908 */
3909 if ((len > XML_MAX_TEXT_LENGTH) &&
3910 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3911 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003912 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003913 goto mem_error;
3914 }
Daniel Veillardfdc91562002-07-01 21:52:03 +00003915 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003916 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003917 if (NXT(1) == '#') {
3918 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003919
Owen Taylor3473f882001-02-23 17:55:21 +00003920 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003921 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003922 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003923 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003924 }
3925 buf[len++] = '&';
3926 } else {
3927 /*
3928 * The reparsing will be done in xmlStringGetNodeList()
3929 * called by the attribute() function in SAX.c
3930 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003931 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003932 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003933 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003934 buf[len++] = '&';
3935 buf[len++] = '#';
3936 buf[len++] = '3';
3937 buf[len++] = '8';
3938 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003939 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003940 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003941 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003942 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003943 }
Owen Taylor3473f882001-02-23 17:55:21 +00003944 len += xmlCopyChar(0, &buf[len], val);
3945 }
3946 } else {
3947 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003948 ctxt->nbentities++;
3949 if (ent != NULL)
3950 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003951 if ((ent != NULL) &&
3952 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003953 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003954 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003955 }
3956 if ((ctxt->replaceEntities == 0) &&
3957 (ent->content[0] == '&')) {
3958 buf[len++] = '&';
3959 buf[len++] = '#';
3960 buf[len++] = '3';
3961 buf[len++] = '8';
3962 buf[len++] = ';';
3963 } else {
3964 buf[len++] = ent->content[0];
3965 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003966 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003967 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003968 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02003969 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003970 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003971 XML_SUBSTITUTE_REF,
3972 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003973 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003974 if (rep != NULL) {
3975 current = rep;
3976 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003977 if ((*current == 0xD) || (*current == 0xA) ||
3978 (*current == 0x9)) {
3979 buf[len++] = 0x20;
3980 current++;
3981 } else
3982 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003983 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003984 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003985 }
3986 }
3987 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003988 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003989 }
3990 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003991 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003992 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003993 }
Owen Taylor3473f882001-02-23 17:55:21 +00003994 if (ent->content != NULL)
3995 buf[len++] = ent->content[0];
3996 }
3997 } else if (ent != NULL) {
3998 int i = xmlStrlen(ent->name);
3999 const xmlChar *cur = ent->name;
4000
4001 /*
4002 * This may look absurd but is needed to detect
4003 * entities problems
4004 */
4005 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004006 (ent->content != NULL) && (ent->checked == 0)) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004007 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004008
Peter Simons8f30bdf2016-04-15 11:56:55 +02004009 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004010 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004011 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004012 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004013
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004014 diff = ctxt->nbentities - oldnbent + 1;
4015 if (diff > INT_MAX / 2)
4016 diff = INT_MAX / 2;
4017 ent->checked = diff * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004018 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004019 if (xmlStrchr(rep, '<'))
4020 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004021 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004022 rep = NULL;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02004023 } else {
4024 ent->content[0] = 0;
4025 }
Owen Taylor3473f882001-02-23 17:55:21 +00004026 }
4027
4028 /*
4029 * Just output the reference
4030 */
4031 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004032 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004033 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004034 }
4035 for (;i > 0;i--)
4036 buf[len++] = *cur++;
4037 buf[len++] = ';';
4038 }
4039 }
4040 } else {
4041 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004042 if ((len != 0) || (!normalize)) {
4043 if ((!normalize) || (!in_space)) {
4044 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004045 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004046 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004047 }
4048 }
4049 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004050 }
4051 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004052 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004053 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004054 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004055 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004056 }
4057 }
4058 NEXTL(l);
4059 }
4060 GROW;
4061 c = CUR_CHAR(l);
4062 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004063 if (ctxt->instate == XML_PARSER_EOF)
4064 goto error;
4065
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004066 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004067 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004068 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004069 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004070 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004071 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004072 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004073 if ((c != 0) && (!IS_CHAR(c))) {
4074 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4075 "invalid character in attribute value\n");
4076 } else {
4077 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4078 "AttValue: ' expected\n");
4079 }
Owen Taylor3473f882001-02-23 17:55:21 +00004080 } else
4081 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004082
4083 /*
4084 * There we potentially risk an overflow, don't allow attribute value of
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004085 * length more than INT_MAX it is a very reasonable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004086 */
4087 if (len >= INT_MAX) {
4088 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004089 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004090 goto mem_error;
4091 }
4092
4093 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004094 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004095
4096mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004097 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004098error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004099 if (buf != NULL)
4100 xmlFree(buf);
4101 if (rep != NULL)
4102 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004103 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004104}
4105
4106/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004107 * xmlParseAttValue:
4108 * @ctxt: an XML parser context
4109 *
4110 * parse a value for an attribute
4111 * Note: the parser won't do substitution of entities here, this
4112 * will be handled later in xmlStringGetNodeList
4113 *
4114 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4115 * "'" ([^<&'] | Reference)* "'"
4116 *
4117 * 3.3.3 Attribute-Value Normalization:
4118 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004119 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004120 * - a character reference is processed by appending the referenced
4121 * character to the attribute value
4122 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004123 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004124 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4125 * appending #x20 to the normalized value, except that only a single
4126 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004127 * parsed entity or the literal entity value of an internal parsed entity
4128 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004129 * If the declared value is not CDATA, then the XML processor must further
4130 * process the normalized attribute value by discarding any leading and
4131 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004132 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004133 * All attributes for which no declaration has been read should be treated
4134 * by a non-validating parser as if declared CDATA.
4135 *
4136 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4137 */
4138
4139
4140xmlChar *
4141xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004142 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004143 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004144}
4145
4146/**
Owen Taylor3473f882001-02-23 17:55:21 +00004147 * xmlParseSystemLiteral:
4148 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004149 *
Owen Taylor3473f882001-02-23 17:55:21 +00004150 * parse an XML Literal
4151 *
4152 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4153 *
4154 * Returns the SystemLiteral parsed or NULL
4155 */
4156
4157xmlChar *
4158xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4159 xmlChar *buf = NULL;
4160 int len = 0;
4161 int size = XML_PARSER_BUFFER_SIZE;
4162 int cur, l;
4163 xmlChar stop;
4164 int state = ctxt->instate;
4165 int count = 0;
4166
4167 SHRINK;
4168 if (RAW == '"') {
4169 NEXT;
4170 stop = '"';
4171 } else if (RAW == '\'') {
4172 NEXT;
4173 stop = '\'';
4174 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004175 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004176 return(NULL);
4177 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004178
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004179 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004180 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004181 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004182 return(NULL);
4183 }
4184 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4185 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004186 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004187 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004188 xmlChar *tmp;
4189
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004190 if ((size > XML_MAX_NAME_LENGTH) &&
4191 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4192 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4193 xmlFree(buf);
4194 ctxt->instate = (xmlParserInputState) state;
4195 return(NULL);
4196 }
Owen Taylor3473f882001-02-23 17:55:21 +00004197 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004198 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4199 if (tmp == NULL) {
4200 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004201 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004202 ctxt->instate = (xmlParserInputState) state;
4203 return(NULL);
4204 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004205 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004206 }
4207 count++;
4208 if (count > 50) {
4209 GROW;
4210 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004211 if (ctxt->instate == XML_PARSER_EOF) {
4212 xmlFree(buf);
4213 return(NULL);
4214 }
Owen Taylor3473f882001-02-23 17:55:21 +00004215 }
4216 COPY_BUF(l,buf,len,cur);
4217 NEXTL(l);
4218 cur = CUR_CHAR(l);
4219 if (cur == 0) {
4220 GROW;
4221 SHRINK;
4222 cur = CUR_CHAR(l);
4223 }
4224 }
4225 buf[len] = 0;
4226 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004227 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004228 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004229 } else {
4230 NEXT;
4231 }
4232 return(buf);
4233}
4234
4235/**
4236 * xmlParsePubidLiteral:
4237 * @ctxt: an XML parser context
4238 *
4239 * parse an XML public literal
4240 *
4241 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4242 *
4243 * Returns the PubidLiteral parsed or NULL.
4244 */
4245
4246xmlChar *
4247xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4248 xmlChar *buf = NULL;
4249 int len = 0;
4250 int size = XML_PARSER_BUFFER_SIZE;
4251 xmlChar cur;
4252 xmlChar stop;
4253 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004254 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004255
4256 SHRINK;
4257 if (RAW == '"') {
4258 NEXT;
4259 stop = '"';
4260 } else if (RAW == '\'') {
4261 NEXT;
4262 stop = '\'';
4263 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004264 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004265 return(NULL);
4266 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004267 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004268 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004269 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004270 return(NULL);
4271 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004272 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004273 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004274 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004275 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004276 xmlChar *tmp;
4277
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004278 if ((size > XML_MAX_NAME_LENGTH) &&
4279 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4280 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4281 xmlFree(buf);
4282 return(NULL);
4283 }
Owen Taylor3473f882001-02-23 17:55:21 +00004284 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004285 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4286 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004287 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004288 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004289 return(NULL);
4290 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004291 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004292 }
4293 buf[len++] = cur;
4294 count++;
4295 if (count > 50) {
4296 GROW;
4297 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004298 if (ctxt->instate == XML_PARSER_EOF) {
4299 xmlFree(buf);
4300 return(NULL);
4301 }
Owen Taylor3473f882001-02-23 17:55:21 +00004302 }
4303 NEXT;
4304 cur = CUR;
4305 if (cur == 0) {
4306 GROW;
4307 SHRINK;
4308 cur = CUR;
4309 }
4310 }
4311 buf[len] = 0;
4312 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004313 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004314 } else {
4315 NEXT;
4316 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004317 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004318 return(buf);
4319}
4320
Daniel Veillard8ed10722009-08-20 19:17:36 +02004321static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004322
4323/*
4324 * used for the test in the inner loop of the char data testing
4325 */
4326static const unsigned char test_char_data[256] = {
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4332 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4333 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4334 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4335 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4336 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4337 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4338 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4339 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4340 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4341 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4342 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4343 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4346 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4347 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4348 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4349 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4350 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4351 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4352 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4353 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4354 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4355 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4356 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4359};
4360
Owen Taylor3473f882001-02-23 17:55:21 +00004361/**
4362 * xmlParseCharData:
4363 * @ctxt: an XML parser context
4364 * @cdata: int indicating whether we are within a CDATA section
4365 *
4366 * parse a CharData section.
4367 * if we are within a CDATA section ']]>' marks an end of section.
4368 *
4369 * The right angle bracket (>) may be represented using the string "&gt;",
4370 * and must, for compatibility, be escaped using "&gt;" or a character
4371 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004372 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004373 *
4374 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4375 */
4376
4377void
4378xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004379 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004380 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004381 int line = ctxt->input->line;
4382 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004383 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004384
4385 SHRINK;
4386 GROW;
4387 /*
4388 * Accelerated common case where input don't need to be
4389 * modified before passing it to the handler.
4390 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004391 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004392 in = ctxt->input->cur;
4393 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004394get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004395 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004396 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004397 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004398 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004399 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004400 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004401 goto get_more_space;
4402 }
4403 if (*in == '<') {
4404 nbchar = in - ctxt->input->cur;
4405 if (nbchar > 0) {
4406 const xmlChar *tmp = ctxt->input->cur;
4407 ctxt->input->cur = in;
4408
Daniel Veillard34099b42004-11-04 17:34:35 +00004409 if ((ctxt->sax != NULL) &&
4410 (ctxt->sax->ignorableWhitespace !=
4411 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004412 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004413 if (ctxt->sax->ignorableWhitespace != NULL)
4414 ctxt->sax->ignorableWhitespace(ctxt->userData,
4415 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004416 } else {
4417 if (ctxt->sax->characters != NULL)
4418 ctxt->sax->characters(ctxt->userData,
4419 tmp, nbchar);
4420 if (*ctxt->space == -1)
4421 *ctxt->space = -2;
4422 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004423 } else if ((ctxt->sax != NULL) &&
4424 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004425 ctxt->sax->characters(ctxt->userData,
4426 tmp, nbchar);
4427 }
4428 }
4429 return;
4430 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004431
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004432get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004433 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004434 while (test_char_data[*in]) {
4435 in++;
4436 ccol++;
4437 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004438 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004439 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004440 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004441 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004442 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004443 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004444 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004445 }
4446 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004447 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004448 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004449 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004450 return;
4451 }
4452 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004453 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004454 goto get_more;
4455 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004456 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004457 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004458 if ((ctxt->sax != NULL) &&
4459 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004460 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004461 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004462 const xmlChar *tmp = ctxt->input->cur;
4463 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004464
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004465 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004466 if (ctxt->sax->ignorableWhitespace != NULL)
4467 ctxt->sax->ignorableWhitespace(ctxt->userData,
4468 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004469 } else {
4470 if (ctxt->sax->characters != NULL)
4471 ctxt->sax->characters(ctxt->userData,
4472 tmp, nbchar);
4473 if (*ctxt->space == -1)
4474 *ctxt->space = -2;
4475 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004476 line = ctxt->input->line;
4477 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004478 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004479 if (ctxt->sax->characters != NULL)
4480 ctxt->sax->characters(ctxt->userData,
4481 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004482 line = ctxt->input->line;
4483 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004484 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004485 /* something really bad happened in the SAX callback */
4486 if (ctxt->instate != XML_PARSER_CONTENT)
4487 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004488 }
4489 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004490 if (*in == 0xD) {
4491 in++;
4492 if (*in == 0xA) {
4493 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004494 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004495 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004496 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004497 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004498 in--;
4499 }
4500 if (*in == '<') {
4501 return;
4502 }
4503 if (*in == '&') {
4504 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004505 }
4506 SHRINK;
4507 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004508 if (ctxt->instate == XML_PARSER_EOF)
4509 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004510 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004511 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004512 nbchar = 0;
4513 }
Daniel Veillard50582112001-03-26 22:52:16 +00004514 ctxt->input->line = line;
4515 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004516 xmlParseCharDataComplex(ctxt, cdata);
4517}
4518
Daniel Veillard01c13b52002-12-10 15:19:08 +00004519/**
4520 * xmlParseCharDataComplex:
4521 * @ctxt: an XML parser context
4522 * @cdata: int indicating whether we are within a CDATA section
4523 *
4524 * parse a CharData section.this is the fallback function
4525 * of xmlParseCharData() when the parsing requires handling
4526 * of non-ASCII characters.
4527 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004528static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004529xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004530 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4531 int nbchar = 0;
4532 int cur, l;
4533 int count = 0;
4534
4535 SHRINK;
4536 GROW;
4537 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004538 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004539 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004540 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004541 if ((cur == ']') && (NXT(1) == ']') &&
4542 (NXT(2) == '>')) {
4543 if (cdata) break;
4544 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004545 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004546 }
4547 }
4548 COPY_BUF(l,buf,nbchar,cur);
4549 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004550 buf[nbchar] = 0;
4551
Owen Taylor3473f882001-02-23 17:55:21 +00004552 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004553 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004554 */
4555 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004556 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004557 if (ctxt->sax->ignorableWhitespace != NULL)
4558 ctxt->sax->ignorableWhitespace(ctxt->userData,
4559 buf, nbchar);
4560 } else {
4561 if (ctxt->sax->characters != NULL)
4562 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004563 if ((ctxt->sax->characters !=
4564 ctxt->sax->ignorableWhitespace) &&
4565 (*ctxt->space == -1))
4566 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004567 }
4568 }
4569 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004570 /* something really bad happened in the SAX callback */
4571 if (ctxt->instate != XML_PARSER_CONTENT)
4572 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004573 }
4574 count++;
4575 if (count > 50) {
4576 GROW;
4577 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004578 if (ctxt->instate == XML_PARSER_EOF)
4579 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004580 }
4581 NEXTL(l);
4582 cur = CUR_CHAR(l);
4583 }
4584 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004585 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004586 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004587 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004588 */
4589 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004590 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004591 if (ctxt->sax->ignorableWhitespace != NULL)
4592 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4593 } else {
4594 if (ctxt->sax->characters != NULL)
4595 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004596 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4597 (*ctxt->space == -1))
4598 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004599 }
4600 }
4601 }
Nick Wellnhofer69936b12017-08-30 14:16:01 +02004602 if ((cur != 0) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004603 /* Generate the error and skip the offending character */
4604 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4605 "PCDATA invalid Char value %d\n",
4606 cur);
4607 NEXTL(l);
4608 }
Owen Taylor3473f882001-02-23 17:55:21 +00004609}
4610
4611/**
4612 * xmlParseExternalID:
4613 * @ctxt: an XML parser context
4614 * @publicID: a xmlChar** receiving PubidLiteral
4615 * @strict: indicate whether we should restrict parsing to only
4616 * production [75], see NOTE below
4617 *
4618 * Parse an External ID or a Public ID
4619 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004620 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004621 * 'PUBLIC' S PubidLiteral S SystemLiteral
4622 *
4623 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4624 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4625 *
4626 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4627 *
4628 * Returns the function returns SystemLiteral and in the second
4629 * case publicID receives PubidLiteral, is strict is off
4630 * it is possible to return NULL and have publicID set.
4631 */
4632
4633xmlChar *
4634xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4635 xmlChar *URI = NULL;
4636
4637 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004638
4639 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004640 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004641 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004642 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004643 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4644 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004645 }
Owen Taylor3473f882001-02-23 17:55:21 +00004646 URI = xmlParseSystemLiteral(ctxt);
4647 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004648 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004649 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004650 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004651 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004652 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004653 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004654 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004655 }
Owen Taylor3473f882001-02-23 17:55:21 +00004656 *publicID = xmlParsePubidLiteral(ctxt);
4657 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004658 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004659 }
4660 if (strict) {
4661 /*
4662 * We don't handle [83] so "S SystemLiteral" is required.
4663 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004664 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004666 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004667 }
4668 } else {
4669 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004670 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004671 * "S SystemLiteral" is not detected. We skip blanks if no
4672 * system literal was found, but this is harmless since we must
4673 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004674 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004675 if (SKIP_BLANKS == 0) return(NULL);
4676 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004677 }
Owen Taylor3473f882001-02-23 17:55:21 +00004678 URI = xmlParseSystemLiteral(ctxt);
4679 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004680 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004681 }
4682 }
4683 return(URI);
4684}
4685
4686/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004687 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004688 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004689 * @buf: the already parsed part of the buffer
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004690 * @len: number of bytes in the buffer
Daniel Veillard4c778d82005-01-23 17:37:44 +00004691 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004692 *
4693 * Skip an XML (SGML) comment <!-- .... -->
4694 * The spec says that "For compatibility, the string "--" (double-hyphen)
4695 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004696 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004697 *
4698 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4699 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004700static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004701xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4702 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004703 int q, ql;
4704 int r, rl;
4705 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004706 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004707 int inputid;
4708
4709 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004710
Owen Taylor3473f882001-02-23 17:55:21 +00004711 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004712 len = 0;
4713 size = XML_PARSER_BUFFER_SIZE;
4714 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4715 if (buf == NULL) {
4716 xmlErrMemory(ctxt, NULL);
4717 return;
4718 }
Owen Taylor3473f882001-02-23 17:55:21 +00004719 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004720 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004721 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004722 if (q == 0)
4723 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004724 if (!IS_CHAR(q)) {
4725 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4726 "xmlParseComment: invalid xmlChar value %d\n",
4727 q);
4728 xmlFree (buf);
4729 return;
4730 }
Owen Taylor3473f882001-02-23 17:55:21 +00004731 NEXTL(ql);
4732 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004733 if (r == 0)
4734 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004735 if (!IS_CHAR(r)) {
4736 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4737 "xmlParseComment: invalid xmlChar value %d\n",
4738 q);
4739 xmlFree (buf);
4740 return;
4741 }
Owen Taylor3473f882001-02-23 17:55:21 +00004742 NEXTL(rl);
4743 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004744 if (cur == 0)
4745 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004746 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004747 ((cur != '>') ||
4748 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004749 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004750 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004751 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004752 if ((len > XML_MAX_TEXT_LENGTH) &&
4753 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4754 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4755 "Comment too big found", NULL);
4756 xmlFree (buf);
4757 return;
4758 }
Owen Taylor3473f882001-02-23 17:55:21 +00004759 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004760 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004761 size_t new_size;
4762
4763 new_size = size * 2;
4764 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004765 if (new_buf == NULL) {
4766 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004767 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004768 return;
4769 }
William M. Bracka3215c72004-07-31 16:24:01 +00004770 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004771 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004772 }
4773 COPY_BUF(ql,buf,len,q);
4774 q = r;
4775 ql = rl;
4776 r = cur;
4777 rl = l;
4778
4779 count++;
4780 if (count > 50) {
4781 GROW;
4782 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004783 if (ctxt->instate == XML_PARSER_EOF) {
4784 xmlFree(buf);
4785 return;
4786 }
Owen Taylor3473f882001-02-23 17:55:21 +00004787 }
4788 NEXTL(l);
4789 cur = CUR_CHAR(l);
4790 if (cur == 0) {
4791 SHRINK;
4792 GROW;
4793 cur = CUR_CHAR(l);
4794 }
4795 }
4796 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004797 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004798 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004799 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004800 } else if (!IS_CHAR(cur)) {
4801 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4802 "xmlParseComment: invalid xmlChar value %d\n",
4803 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004804 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004805 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004806 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004807 "Comment doesn't start and stop in the same"
4808 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004809 }
4810 NEXT;
4811 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4812 (!ctxt->disableSAX))
4813 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004814 }
Daniel Veillardda629342007-08-01 07:49:06 +00004815 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004816 return;
4817not_terminated:
4818 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4819 "Comment not terminated\n", NULL);
4820 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004821 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004822}
Daniel Veillardda629342007-08-01 07:49:06 +00004823
Daniel Veillard4c778d82005-01-23 17:37:44 +00004824/**
4825 * xmlParseComment:
4826 * @ctxt: an XML parser context
4827 *
4828 * Skip an XML (SGML) comment <!-- .... -->
4829 * The spec says that "For compatibility, the string "--" (double-hyphen)
4830 * must not occur within comments. "
4831 *
4832 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4833 */
4834void
4835xmlParseComment(xmlParserCtxtPtr ctxt) {
4836 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004837 size_t size = XML_PARSER_BUFFER_SIZE;
4838 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004839 xmlParserInputState state;
4840 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004841 size_t nbchar = 0;
4842 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004843 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004844
4845 /*
4846 * Check that there is a comment right here.
4847 */
4848 if ((RAW != '<') || (NXT(1) != '!') ||
4849 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004850 state = ctxt->instate;
4851 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004852 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004853 SKIP(4);
4854 SHRINK;
4855 GROW;
4856
4857 /*
4858 * Accelerated common case where input don't need to be
4859 * modified before passing it to the handler.
4860 */
4861 in = ctxt->input->cur;
4862 do {
4863 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004864 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004865 ctxt->input->line++; ctxt->input->col = 1;
4866 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004867 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004868 }
4869get_more:
4870 ccol = ctxt->input->col;
4871 while (((*in > '-') && (*in <= 0x7F)) ||
4872 ((*in >= 0x20) && (*in < '-')) ||
4873 (*in == 0x09)) {
4874 in++;
4875 ccol++;
4876 }
4877 ctxt->input->col = ccol;
4878 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004879 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004880 ctxt->input->line++; ctxt->input->col = 1;
4881 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004882 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004883 goto get_more;
4884 }
4885 nbchar = in - ctxt->input->cur;
4886 /*
4887 * save current set of data
4888 */
4889 if (nbchar > 0) {
4890 if ((ctxt->sax != NULL) &&
4891 (ctxt->sax->comment != NULL)) {
4892 if (buf == NULL) {
4893 if ((*in == '-') && (in[1] == '-'))
4894 size = nbchar + 1;
4895 else
4896 size = XML_PARSER_BUFFER_SIZE + nbchar;
4897 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4898 if (buf == NULL) {
4899 xmlErrMemory(ctxt, NULL);
4900 ctxt->instate = state;
4901 return;
4902 }
4903 len = 0;
4904 } else if (len + nbchar + 1 >= size) {
4905 xmlChar *new_buf;
4906 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4907 new_buf = (xmlChar *) xmlRealloc(buf,
4908 size * sizeof(xmlChar));
4909 if (new_buf == NULL) {
4910 xmlFree (buf);
4911 xmlErrMemory(ctxt, NULL);
4912 ctxt->instate = state;
4913 return;
4914 }
4915 buf = new_buf;
4916 }
4917 memcpy(&buf[len], ctxt->input->cur, nbchar);
4918 len += nbchar;
4919 buf[len] = 0;
4920 }
4921 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004922 if ((len > XML_MAX_TEXT_LENGTH) &&
4923 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4924 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4925 "Comment too big found", NULL);
4926 xmlFree (buf);
4927 return;
4928 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004929 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004930 if (*in == 0xA) {
4931 in++;
4932 ctxt->input->line++; ctxt->input->col = 1;
4933 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004934 if (*in == 0xD) {
4935 in++;
4936 if (*in == 0xA) {
4937 ctxt->input->cur = in;
4938 in++;
4939 ctxt->input->line++; ctxt->input->col = 1;
4940 continue; /* while */
4941 }
4942 in--;
4943 }
4944 SHRINK;
4945 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004946 if (ctxt->instate == XML_PARSER_EOF) {
4947 xmlFree(buf);
4948 return;
4949 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004950 in = ctxt->input->cur;
4951 if (*in == '-') {
4952 if (in[1] == '-') {
4953 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004954 if (ctxt->input->id != inputid) {
4955 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004956 "comment doesn't start and stop in the"
4957 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00004958 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004959 SKIP(3);
4960 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4961 (!ctxt->disableSAX)) {
4962 if (buf != NULL)
4963 ctxt->sax->comment(ctxt->userData, buf);
4964 else
4965 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4966 }
4967 if (buf != NULL)
4968 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08004969 if (ctxt->instate != XML_PARSER_EOF)
4970 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004971 return;
4972 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004973 if (buf != NULL) {
4974 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4975 "Double hyphen within comment: "
4976 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004977 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004978 } else
4979 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4980 "Double hyphen within comment\n", NULL);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07004981 if (ctxt->instate == XML_PARSER_EOF) {
4982 xmlFree(buf);
4983 return;
4984 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004985 in++;
4986 ctxt->input->col++;
4987 }
4988 in++;
4989 ctxt->input->col++;
4990 goto get_more;
4991 }
4992 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4993 xmlParseCommentComplex(ctxt, buf, len, size);
4994 ctxt->instate = state;
4995 return;
4996}
4997
Owen Taylor3473f882001-02-23 17:55:21 +00004998
4999/**
5000 * xmlParsePITarget:
5001 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005002 *
Owen Taylor3473f882001-02-23 17:55:21 +00005003 * parse the name of a PI
5004 *
5005 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5006 *
5007 * Returns the PITarget name or NULL
5008 */
5009
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005010const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005011xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005012 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005013
5014 name = xmlParseName(ctxt);
5015 if ((name != NULL) &&
5016 ((name[0] == 'x') || (name[0] == 'X')) &&
5017 ((name[1] == 'm') || (name[1] == 'M')) &&
5018 ((name[2] == 'l') || (name[2] == 'L'))) {
5019 int i;
5020 if ((name[0] == 'x') && (name[1] == 'm') &&
5021 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005022 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005023 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005024 return(name);
5025 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005026 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 return(name);
5028 }
5029 for (i = 0;;i++) {
5030 if (xmlW3CPIs[i] == NULL) break;
5031 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5032 return(name);
5033 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005034 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5035 "xmlParsePITarget: invalid name prefix 'xml'\n",
5036 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005037 }
Daniel Veillard37334572008-07-31 08:20:02 +00005038 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005039 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005040 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005041 }
Owen Taylor3473f882001-02-23 17:55:21 +00005042 return(name);
5043}
5044
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005045#ifdef LIBXML_CATALOG_ENABLED
5046/**
5047 * xmlParseCatalogPI:
5048 * @ctxt: an XML parser context
5049 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005050 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005051 * parse an XML Catalog Processing Instruction.
5052 *
5053 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5054 *
5055 * Occurs only if allowed by the user and if happening in the Misc
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005056 * part of the document before any doctype information
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005057 * This will add the given catalog to the parsing context in order
5058 * to be used if there is a resolution need further down in the document
5059 */
5060
5061static void
5062xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5063 xmlChar *URL = NULL;
5064 const xmlChar *tmp, *base;
5065 xmlChar marker;
5066
5067 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005068 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005069 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5070 goto error;
5071 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005072 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005073 if (*tmp != '=') {
5074 return;
5075 }
5076 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005077 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005078 marker = *tmp;
5079 if ((marker != '\'') && (marker != '"'))
5080 goto error;
5081 tmp++;
5082 base = tmp;
5083 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5084 if (*tmp == 0)
5085 goto error;
5086 URL = xmlStrndup(base, tmp - base);
5087 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005088 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005089 if (*tmp != 0)
5090 goto error;
5091
5092 if (URL != NULL) {
5093 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5094 xmlFree(URL);
5095 }
5096 return;
5097
5098error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005099 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5100 "Catalog PI syntax error: %s\n",
5101 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005102 if (URL != NULL)
5103 xmlFree(URL);
5104}
5105#endif
5106
Owen Taylor3473f882001-02-23 17:55:21 +00005107/**
5108 * xmlParsePI:
5109 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005110 *
Owen Taylor3473f882001-02-23 17:55:21 +00005111 * parse an XML Processing Instruction.
5112 *
5113 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5114 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005115 * The processing is transferred to SAX once parsed.
Owen Taylor3473f882001-02-23 17:55:21 +00005116 */
5117
5118void
5119xmlParsePI(xmlParserCtxtPtr ctxt) {
5120 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005121 size_t len = 0;
5122 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005123 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005124 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005125 xmlParserInputState state;
5126 int count = 0;
5127
5128 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005129 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005130 state = ctxt->instate;
5131 ctxt->instate = XML_PARSER_PI;
5132 /*
5133 * this is a Processing Instruction.
5134 */
5135 SKIP(2);
5136 SHRINK;
5137
5138 /*
5139 * Parse the target name and check for special support like
5140 * namespace.
5141 */
5142 target = xmlParsePITarget(ctxt);
5143 if (target != NULL) {
5144 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005145 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005146 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005147 "PI declaration doesn't start and stop in"
5148 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005149 }
5150 SKIP(2);
5151
5152 /*
5153 * SAX: PI detected.
5154 */
5155 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5156 (ctxt->sax->processingInstruction != NULL))
5157 ctxt->sax->processingInstruction(ctxt->userData,
5158 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005159 if (ctxt->instate != XML_PARSER_EOF)
5160 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 return;
5162 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005163 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005164 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005165 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005166 ctxt->instate = state;
5167 return;
5168 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005169 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005170 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5171 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005172 }
Owen Taylor3473f882001-02-23 17:55:21 +00005173 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005174 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005175 ((cur != '?') || (NXT(1) != '>'))) {
5176 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005177 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005178 size_t new_size = size * 2;
5179 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005180 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005181 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005182 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005183 ctxt->instate = state;
5184 return;
5185 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005186 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005187 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005188 }
5189 count++;
5190 if (count > 50) {
5191 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005192 if (ctxt->instate == XML_PARSER_EOF) {
5193 xmlFree(buf);
5194 return;
5195 }
Owen Taylor3473f882001-02-23 17:55:21 +00005196 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005197 if ((len > XML_MAX_TEXT_LENGTH) &&
5198 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5199 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5200 "PI %s too big found", target);
5201 xmlFree(buf);
5202 ctxt->instate = state;
5203 return;
5204 }
Owen Taylor3473f882001-02-23 17:55:21 +00005205 }
5206 COPY_BUF(l,buf,len,cur);
5207 NEXTL(l);
5208 cur = CUR_CHAR(l);
5209 if (cur == 0) {
5210 SHRINK;
5211 GROW;
5212 cur = CUR_CHAR(l);
5213 }
5214 }
Daniel Veillard51304812012-07-19 20:34:26 +08005215 if ((len > XML_MAX_TEXT_LENGTH) &&
5216 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5217 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5218 "PI %s too big found", target);
5219 xmlFree(buf);
5220 ctxt->instate = state;
5221 return;
5222 }
Owen Taylor3473f882001-02-23 17:55:21 +00005223 buf[len] = 0;
5224 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005225 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5226 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005227 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005228 if (inputid != ctxt->input->id) {
5229 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5230 "PI declaration doesn't start and stop in"
5231 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005232 }
5233 SKIP(2);
5234
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005235#ifdef LIBXML_CATALOG_ENABLED
5236 if (((state == XML_PARSER_MISC) ||
5237 (state == XML_PARSER_START)) &&
5238 (xmlStrEqual(target, XML_CATALOG_PI))) {
5239 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5240 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5241 (allow == XML_CATA_ALLOW_ALL))
5242 xmlParseCatalogPI(ctxt, buf);
5243 }
5244#endif
5245
5246
Owen Taylor3473f882001-02-23 17:55:21 +00005247 /*
5248 * SAX: PI detected.
5249 */
5250 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5251 (ctxt->sax->processingInstruction != NULL))
5252 ctxt->sax->processingInstruction(ctxt->userData,
5253 target, buf);
5254 }
5255 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005256 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005257 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005258 }
Chris Evans77404b82011-12-14 16:18:25 +08005259 if (ctxt->instate != XML_PARSER_EOF)
5260 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 }
5262}
5263
5264/**
5265 * xmlParseNotationDecl:
5266 * @ctxt: an XML parser context
5267 *
5268 * parse a notation declaration
5269 *
5270 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5271 *
5272 * Hence there is actually 3 choices:
5273 * 'PUBLIC' S PubidLiteral
5274 * 'PUBLIC' S PubidLiteral S SystemLiteral
5275 * and 'SYSTEM' S SystemLiteral
5276 *
5277 * See the NOTE on xmlParseExternalID().
5278 */
5279
5280void
5281xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005282 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005283 xmlChar *Pubid;
5284 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005285
Daniel Veillarda07050d2003-10-19 14:46:32 +00005286 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005287 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005288 SHRINK;
5289 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005290 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005291 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5292 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005293 return;
5294 }
Owen Taylor3473f882001-02-23 17:55:21 +00005295
Daniel Veillard76d66f42001-05-16 21:05:17 +00005296 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005297 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005298 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005299 return;
5300 }
Daniel Veillard37334572008-07-31 08:20:02 +00005301 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005302 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005303 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005304 name, NULL, NULL);
5305 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005306 if (SKIP_BLANKS == 0) {
5307 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5308 "Space required after the NOTATION name'\n");
5309 return;
5310 }
Owen Taylor3473f882001-02-23 17:55:21 +00005311
5312 /*
5313 * Parse the IDs.
5314 */
5315 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5316 SKIP_BLANKS;
5317
5318 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005319 if (inputid != ctxt->input->id) {
5320 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5321 "Notation declaration doesn't start and stop"
5322 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005323 }
5324 NEXT;
5325 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5326 (ctxt->sax->notationDecl != NULL))
5327 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5328 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005329 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005330 }
Owen Taylor3473f882001-02-23 17:55:21 +00005331 if (Systemid != NULL) xmlFree(Systemid);
5332 if (Pubid != NULL) xmlFree(Pubid);
5333 }
5334}
5335
5336/**
5337 * xmlParseEntityDecl:
5338 * @ctxt: an XML parser context
5339 *
5340 * parse <!ENTITY declarations
5341 *
5342 * [70] EntityDecl ::= GEDecl | PEDecl
5343 *
5344 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5345 *
5346 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5347 *
5348 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5349 *
5350 * [74] PEDef ::= EntityValue | ExternalID
5351 *
5352 * [76] NDataDecl ::= S 'NDATA' S Name
5353 *
5354 * [ VC: Notation Declared ]
5355 * The Name must match the declared name of a notation.
5356 */
5357
5358void
5359xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005360 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005361 xmlChar *value = NULL;
5362 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005363 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005364 int isParameter = 0;
5365 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005366
Daniel Veillard4c778d82005-01-23 17:37:44 +00005367 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005368 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005369 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005370 SHRINK;
5371 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005372 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005373 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5374 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005375 }
Owen Taylor3473f882001-02-23 17:55:21 +00005376
5377 if (RAW == '%') {
5378 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005379 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005380 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005381 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005382 }
Owen Taylor3473f882001-02-23 17:55:21 +00005383 isParameter = 1;
5384 }
5385
Daniel Veillard76d66f42001-05-16 21:05:17 +00005386 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005387 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005388 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5389 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005390 return;
5391 }
Daniel Veillard37334572008-07-31 08:20:02 +00005392 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005393 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005394 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005395 name, NULL, NULL);
5396 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005397 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005398 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5399 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005400 }
Owen Taylor3473f882001-02-23 17:55:21 +00005401
Daniel Veillardf5582f12002-06-11 10:08:16 +00005402 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005403 /*
5404 * handle the various case of definitions...
5405 */
5406 if (isParameter) {
5407 if ((RAW == '"') || (RAW == '\'')) {
5408 value = xmlParseEntityValue(ctxt, &orig);
5409 if (value) {
5410 if ((ctxt->sax != NULL) &&
5411 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5412 ctxt->sax->entityDecl(ctxt->userData, name,
5413 XML_INTERNAL_PARAMETER_ENTITY,
5414 NULL, NULL, value);
5415 }
5416 } else {
5417 URI = xmlParseExternalID(ctxt, &literal, 1);
5418 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005419 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 }
5421 if (URI) {
5422 xmlURIPtr uri;
5423
5424 uri = xmlParseURI((const char *) URI);
5425 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005426 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5427 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005428 /*
5429 * This really ought to be a well formedness error
5430 * but the XML Core WG decided otherwise c.f. issue
5431 * E26 of the XML erratas.
5432 */
Owen Taylor3473f882001-02-23 17:55:21 +00005433 } else {
5434 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005435 /*
5436 * Okay this is foolish to block those but not
5437 * invalid URIs.
5438 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005439 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005440 } else {
5441 if ((ctxt->sax != NULL) &&
5442 (!ctxt->disableSAX) &&
5443 (ctxt->sax->entityDecl != NULL))
5444 ctxt->sax->entityDecl(ctxt->userData, name,
5445 XML_EXTERNAL_PARAMETER_ENTITY,
5446 literal, URI, NULL);
5447 }
5448 xmlFreeURI(uri);
5449 }
5450 }
5451 }
5452 } else {
5453 if ((RAW == '"') || (RAW == '\'')) {
5454 value = xmlParseEntityValue(ctxt, &orig);
5455 if ((ctxt->sax != NULL) &&
5456 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5457 ctxt->sax->entityDecl(ctxt->userData, name,
5458 XML_INTERNAL_GENERAL_ENTITY,
5459 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005460 /*
5461 * For expat compatibility in SAX mode.
5462 */
5463 if ((ctxt->myDoc == NULL) ||
5464 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5465 if (ctxt->myDoc == NULL) {
5466 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005467 if (ctxt->myDoc == NULL) {
5468 xmlErrMemory(ctxt, "New Doc failed");
5469 return;
5470 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005471 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005472 }
5473 if (ctxt->myDoc->intSubset == NULL)
5474 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5475 BAD_CAST "fake", NULL, NULL);
5476
Daniel Veillard1af9a412003-08-20 22:54:39 +00005477 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5478 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005479 }
Owen Taylor3473f882001-02-23 17:55:21 +00005480 } else {
5481 URI = xmlParseExternalID(ctxt, &literal, 1);
5482 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005483 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005484 }
5485 if (URI) {
5486 xmlURIPtr uri;
5487
5488 uri = xmlParseURI((const char *)URI);
5489 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005490 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5491 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005492 /*
5493 * This really ought to be a well formedness error
5494 * but the XML Core WG decided otherwise c.f. issue
5495 * E26 of the XML erratas.
5496 */
Owen Taylor3473f882001-02-23 17:55:21 +00005497 } else {
5498 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005499 /*
5500 * Okay this is foolish to block those but not
5501 * invalid URIs.
5502 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005503 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005504 }
5505 xmlFreeURI(uri);
5506 }
5507 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005508 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005509 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5510 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005511 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005512 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005513 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005514 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005515 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5516 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005517 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005518 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005519 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5520 (ctxt->sax->unparsedEntityDecl != NULL))
5521 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5522 literal, URI, ndata);
5523 } else {
5524 if ((ctxt->sax != NULL) &&
5525 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5526 ctxt->sax->entityDecl(ctxt->userData, name,
5527 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5528 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005529 /*
5530 * For expat compatibility in SAX mode.
Haibo Huangcfd91dc2020-07-30 23:01:33 -07005531 * assuming the entity replacement was asked for
Daniel Veillard5997aca2002-03-18 18:36:20 +00005532 */
5533 if ((ctxt->replaceEntities != 0) &&
5534 ((ctxt->myDoc == NULL) ||
5535 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5536 if (ctxt->myDoc == NULL) {
5537 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005538 if (ctxt->myDoc == NULL) {
5539 xmlErrMemory(ctxt, "New Doc failed");
5540 return;
5541 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005542 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005543 }
5544
5545 if (ctxt->myDoc->intSubset == NULL)
5546 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5547 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005548 xmlSAX2EntityDecl(ctxt, name,
5549 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5550 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005551 }
Owen Taylor3473f882001-02-23 17:55:21 +00005552 }
5553 }
5554 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005555 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005556 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005557 SKIP_BLANKS;
5558 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005559 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005560 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005561 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005562 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005563 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005564 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005565 "Entity declaration doesn't start and stop in"
5566 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005567 }
5568 NEXT;
5569 }
5570 if (orig != NULL) {
5571 /*
5572 * Ugly mechanism to save the raw entity value.
5573 */
5574 xmlEntityPtr cur = NULL;
5575
5576 if (isParameter) {
5577 if ((ctxt->sax != NULL) &&
5578 (ctxt->sax->getParameterEntity != NULL))
5579 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5580 } else {
5581 if ((ctxt->sax != NULL) &&
5582 (ctxt->sax->getEntity != NULL))
5583 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005584 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005585 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005586 }
Owen Taylor3473f882001-02-23 17:55:21 +00005587 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005588 if ((cur != NULL) && (cur->orig == NULL)) {
5589 cur->orig = orig;
5590 orig = NULL;
5591 }
Owen Taylor3473f882001-02-23 17:55:21 +00005592 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005593
5594done:
Owen Taylor3473f882001-02-23 17:55:21 +00005595 if (value != NULL) xmlFree(value);
5596 if (URI != NULL) xmlFree(URI);
5597 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005598 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005599 }
5600}
5601
5602/**
5603 * xmlParseDefaultDecl:
5604 * @ctxt: an XML parser context
5605 * @value: Receive a possible fixed default value for the attribute
5606 *
5607 * Parse an attribute default declaration
5608 *
5609 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5610 *
5611 * [ VC: Required Attribute ]
5612 * if the default declaration is the keyword #REQUIRED, then the
5613 * attribute must be specified for all elements of the type in the
5614 * attribute-list declaration.
5615 *
5616 * [ VC: Attribute Default Legal ]
5617 * The declared default value must meet the lexical constraints of
5618 * the declared attribute type c.f. xmlValidateAttributeDecl()
5619 *
5620 * [ VC: Fixed Attribute Default ]
5621 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005622 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005623 *
5624 * [ WFC: No < in Attribute Values ]
5625 * handled in xmlParseAttValue()
5626 *
5627 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005628 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005629 */
5630
5631int
5632xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5633 int val;
5634 xmlChar *ret;
5635
5636 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005637 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005638 SKIP(9);
5639 return(XML_ATTRIBUTE_REQUIRED);
5640 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005641 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005642 SKIP(8);
5643 return(XML_ATTRIBUTE_IMPLIED);
5644 }
5645 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005646 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005647 SKIP(6);
5648 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005649 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005650 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5651 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005652 }
Owen Taylor3473f882001-02-23 17:55:21 +00005653 }
5654 ret = xmlParseAttValue(ctxt);
5655 ctxt->instate = XML_PARSER_DTD;
5656 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005657 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005658 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005659 } else
5660 *value = ret;
5661 return(val);
5662}
5663
5664/**
5665 * xmlParseNotationType:
5666 * @ctxt: an XML parser context
5667 *
5668 * parse an Notation attribute type.
5669 *
5670 * Note: the leading 'NOTATION' S part has already being parsed...
5671 *
5672 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5673 *
5674 * [ VC: Notation Attributes ]
5675 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005676 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005677 *
5678 * Returns: the notation attribute tree built while parsing
5679 */
5680
5681xmlEnumerationPtr
5682xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005683 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005684 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005685
5686 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005687 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005688 return(NULL);
5689 }
5690 SHRINK;
5691 do {
5692 NEXT;
5693 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005694 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005695 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005696 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5697 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005698 xmlFreeEnumeration(ret);
5699 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005700 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005701 tmp = ret;
5702 while (tmp != NULL) {
5703 if (xmlStrEqual(name, tmp->name)) {
5704 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5705 "standalone: attribute notation value token %s duplicated\n",
5706 name, NULL);
5707 if (!xmlDictOwns(ctxt->dict, name))
5708 xmlFree((xmlChar *) name);
5709 break;
5710 }
5711 tmp = tmp->next;
5712 }
5713 if (tmp == NULL) {
5714 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005715 if (cur == NULL) {
5716 xmlFreeEnumeration(ret);
5717 return(NULL);
5718 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005719 if (last == NULL) ret = last = cur;
5720 else {
5721 last->next = cur;
5722 last = cur;
5723 }
Owen Taylor3473f882001-02-23 17:55:21 +00005724 }
5725 SKIP_BLANKS;
5726 } while (RAW == '|');
5727 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005728 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005729 xmlFreeEnumeration(ret);
5730 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005731 }
5732 NEXT;
5733 return(ret);
5734}
5735
5736/**
5737 * xmlParseEnumerationType:
5738 * @ctxt: an XML parser context
5739 *
5740 * parse an Enumeration attribute type.
5741 *
5742 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5743 *
5744 * [ VC: Enumeration ]
5745 * Values of this type must match one of the Nmtoken tokens in
5746 * the declaration
5747 *
5748 * Returns: the enumeration attribute tree built while parsing
5749 */
5750
5751xmlEnumerationPtr
5752xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5753 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005754 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005755
5756 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005757 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 return(NULL);
5759 }
5760 SHRINK;
5761 do {
5762 NEXT;
5763 SKIP_BLANKS;
5764 name = xmlParseNmtoken(ctxt);
5765 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005766 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005767 return(ret);
5768 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005769 tmp = ret;
5770 while (tmp != NULL) {
5771 if (xmlStrEqual(name, tmp->name)) {
5772 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5773 "standalone: attribute enumeration value token %s duplicated\n",
5774 name, NULL);
5775 if (!xmlDictOwns(ctxt->dict, name))
5776 xmlFree(name);
5777 break;
5778 }
5779 tmp = tmp->next;
5780 }
5781 if (tmp == NULL) {
5782 cur = xmlCreateEnumeration(name);
5783 if (!xmlDictOwns(ctxt->dict, name))
5784 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005785 if (cur == NULL) {
5786 xmlFreeEnumeration(ret);
5787 return(NULL);
5788 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005789 if (last == NULL) ret = last = cur;
5790 else {
5791 last->next = cur;
5792 last = cur;
5793 }
Owen Taylor3473f882001-02-23 17:55:21 +00005794 }
5795 SKIP_BLANKS;
5796 } while (RAW == '|');
5797 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005798 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005799 return(ret);
5800 }
5801 NEXT;
5802 return(ret);
5803}
5804
5805/**
5806 * xmlParseEnumeratedType:
5807 * @ctxt: an XML parser context
5808 * @tree: the enumeration tree built while parsing
5809 *
5810 * parse an Enumerated attribute type.
5811 *
5812 * [57] EnumeratedType ::= NotationType | Enumeration
5813 *
5814 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5815 *
5816 *
5817 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5818 */
5819
5820int
5821xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005822 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005823 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005824 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005825 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5826 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005827 return(0);
5828 }
Owen Taylor3473f882001-02-23 17:55:21 +00005829 *tree = xmlParseNotationType(ctxt);
5830 if (*tree == NULL) return(0);
5831 return(XML_ATTRIBUTE_NOTATION);
5832 }
5833 *tree = xmlParseEnumerationType(ctxt);
5834 if (*tree == NULL) return(0);
5835 return(XML_ATTRIBUTE_ENUMERATION);
5836}
5837
5838/**
5839 * xmlParseAttributeType:
5840 * @ctxt: an XML parser context
5841 * @tree: the enumeration tree built while parsing
5842 *
5843 * parse the Attribute list def for an element
5844 *
5845 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5846 *
5847 * [55] StringType ::= 'CDATA'
5848 *
5849 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5850 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5851 *
5852 * Validity constraints for attribute values syntax are checked in
5853 * xmlValidateAttributeValue()
5854 *
5855 * [ VC: ID ]
5856 * Values of type ID must match the Name production. A name must not
5857 * appear more than once in an XML document as a value of this type;
5858 * i.e., ID values must uniquely identify the elements which bear them.
5859 *
5860 * [ VC: One ID per Element Type ]
5861 * No element type may have more than one ID attribute specified.
5862 *
5863 * [ VC: ID Attribute Default ]
5864 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5865 *
5866 * [ VC: IDREF ]
5867 * Values of type IDREF must match the Name production, and values
5868 * of type IDREFS must match Names; each IDREF Name must match the value
5869 * of an ID attribute on some element in the XML document; i.e. IDREF
5870 * values must match the value of some ID attribute.
5871 *
5872 * [ VC: Entity Name ]
5873 * Values of type ENTITY must match the Name production, values
5874 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005875 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005876 *
5877 * [ VC: Name Token ]
5878 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005879 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005880 *
5881 * Returns the attribute type
5882 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005883int
Owen Taylor3473f882001-02-23 17:55:21 +00005884xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5885 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005886 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005887 SKIP(5);
5888 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005889 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005890 SKIP(6);
5891 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005892 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005893 SKIP(5);
5894 return(XML_ATTRIBUTE_IDREF);
5895 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5896 SKIP(2);
5897 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005898 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005899 SKIP(6);
5900 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005901 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005902 SKIP(8);
5903 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005904 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005905 SKIP(8);
5906 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005907 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005908 SKIP(7);
5909 return(XML_ATTRIBUTE_NMTOKEN);
5910 }
5911 return(xmlParseEnumeratedType(ctxt, tree));
5912}
5913
5914/**
5915 * xmlParseAttributeListDecl:
5916 * @ctxt: an XML parser context
5917 *
5918 * : parse the Attribute list def for an element
5919 *
5920 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5921 *
5922 * [53] AttDef ::= S Name S AttType S DefaultDecl
5923 *
5924 */
5925void
5926xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005927 const xmlChar *elemName;
5928 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005929 xmlEnumerationPtr tree;
5930
Daniel Veillarda07050d2003-10-19 14:46:32 +00005931 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005932 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005933
5934 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005935 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005936 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005937 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005938 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005939 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005940 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005941 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5942 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005943 return;
5944 }
5945 SKIP_BLANKS;
5946 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005947 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005948 int type;
5949 int def;
5950 xmlChar *defaultValue = NULL;
5951
5952 GROW;
5953 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005954 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005955 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005956 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5957 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005958 break;
5959 }
5960 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005961 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005962 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005963 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005964 break;
5965 }
Owen Taylor3473f882001-02-23 17:55:21 +00005966
5967 type = xmlParseAttributeType(ctxt, &tree);
5968 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005969 break;
5970 }
5971
5972 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005973 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005974 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5975 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005976 if (tree != NULL)
5977 xmlFreeEnumeration(tree);
5978 break;
5979 }
Owen Taylor3473f882001-02-23 17:55:21 +00005980
5981 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5982 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005983 if (defaultValue != NULL)
5984 xmlFree(defaultValue);
5985 if (tree != NULL)
5986 xmlFreeEnumeration(tree);
5987 break;
5988 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005989 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5990 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005991
5992 GROW;
5993 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005994 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005995 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005996 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005997 if (defaultValue != NULL)
5998 xmlFree(defaultValue);
5999 if (tree != NULL)
6000 xmlFreeEnumeration(tree);
6001 break;
6002 }
Owen Taylor3473f882001-02-23 17:55:21 +00006003 }
Owen Taylor3473f882001-02-23 17:55:21 +00006004 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6005 (ctxt->sax->attributeDecl != NULL))
6006 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6007 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006008 else if (tree != NULL)
6009 xmlFreeEnumeration(tree);
6010
6011 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006012 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006013 (def != XML_ATTRIBUTE_REQUIRED)) {
6014 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6015 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006016 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006017 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6018 }
Owen Taylor3473f882001-02-23 17:55:21 +00006019 if (defaultValue != NULL)
6020 xmlFree(defaultValue);
6021 GROW;
6022 }
6023 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006024 if (inputid != ctxt->input->id) {
6025 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6026 "Attribute list declaration doesn't start and"
6027 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006028 }
6029 NEXT;
6030 }
Owen Taylor3473f882001-02-23 17:55:21 +00006031 }
6032}
6033
6034/**
6035 * xmlParseElementMixedContentDecl:
6036 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006037 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006038 *
6039 * parse the declaration for a Mixed Element content
6040 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006041 *
Owen Taylor3473f882001-02-23 17:55:21 +00006042 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6043 * '(' S? '#PCDATA' S? ')'
6044 *
6045 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6046 *
6047 * [ VC: No Duplicate Types ]
6048 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006049 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006050 *
6051 * returns: the list of the xmlElementContentPtr describing the element choices
6052 */
6053xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006054xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006055 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006056 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006057
6058 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006059 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006060 SKIP(7);
6061 SKIP_BLANKS;
6062 SHRINK;
6063 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006064 if (ctxt->input->id != inputchk) {
6065 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6066 "Element content declaration doesn't start and"
6067 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006068 }
Owen Taylor3473f882001-02-23 17:55:21 +00006069 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006070 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006071 if (ret == NULL)
6072 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006073 if (RAW == '*') {
6074 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6075 NEXT;
6076 }
6077 return(ret);
6078 }
6079 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006080 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006081 if (ret == NULL) return(NULL);
6082 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006083 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006084 NEXT;
6085 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006086 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006087 if (ret == NULL) return(NULL);
6088 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006089 if (cur != NULL)
6090 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006091 cur = ret;
6092 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006093 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006094 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006095 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006096 if (n->c1 != NULL)
6097 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006098 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006099 if (n != NULL)
6100 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006101 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006102 }
6103 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006104 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006105 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006106 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006107 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006108 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006109 return(NULL);
6110 }
6111 SKIP_BLANKS;
6112 GROW;
6113 }
6114 if ((RAW == ')') && (NXT(1) == '*')) {
6115 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006116 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006117 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006118 if (cur->c2 != NULL)
6119 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006120 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006121 if (ret != NULL)
6122 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006123 if (ctxt->input->id != inputchk) {
6124 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6125 "Element content declaration doesn't start and"
6126 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006127 }
Owen Taylor3473f882001-02-23 17:55:21 +00006128 SKIP(2);
6129 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006130 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006131 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006132 return(NULL);
6133 }
6134
6135 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006136 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006137 }
6138 return(ret);
6139}
6140
6141/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006142 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006143 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006144 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006145 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006146 *
6147 * parse the declaration for a Mixed Element content
6148 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006149 *
Owen Taylor3473f882001-02-23 17:55:21 +00006150 *
6151 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6152 *
6153 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6154 *
6155 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6156 *
6157 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6158 *
6159 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6160 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006161 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006162 * opening or closing parentheses in a choice, seq, or Mixed
6163 * construct is contained in the replacement text for a parameter
6164 * entity, both must be contained in the same replacement text. For
6165 * interoperability, if a parameter-entity reference appears in a
6166 * choice, seq, or Mixed construct, its replacement text should not
6167 * be empty, and neither the first nor last non-blank character of
6168 * the replacement text should be a connector (| or ,).
6169 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006170 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006171 * hierarchy.
6172 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006173static xmlElementContentPtr
6174xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6175 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006176 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006177 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006178 xmlChar type = 0;
6179
Daniel Veillard489f9672009-08-10 16:49:30 +02006180 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6181 (depth > 2048)) {
6182 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6183"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6184 depth);
6185 return(NULL);
6186 }
Owen Taylor3473f882001-02-23 17:55:21 +00006187 SKIP_BLANKS;
6188 GROW;
6189 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006190 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006191
Owen Taylor3473f882001-02-23 17:55:21 +00006192 /* Recurse on first child */
6193 NEXT;
6194 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006195 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6196 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006197 SKIP_BLANKS;
6198 GROW;
6199 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006200 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006201 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006202 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006203 return(NULL);
6204 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006205 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006206 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006207 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006208 return(NULL);
6209 }
Owen Taylor3473f882001-02-23 17:55:21 +00006210 GROW;
6211 if (RAW == '?') {
6212 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6213 NEXT;
6214 } else if (RAW == '*') {
6215 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6216 NEXT;
6217 } else if (RAW == '+') {
6218 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6219 NEXT;
6220 } else {
6221 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6222 }
Owen Taylor3473f882001-02-23 17:55:21 +00006223 GROW;
6224 }
6225 SKIP_BLANKS;
6226 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006227 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006228 /*
6229 * Each loop we parse one separator and one element.
6230 */
6231 if (RAW == ',') {
6232 if (type == 0) type = CUR;
6233
6234 /*
6235 * Detect "Name | Name , Name" error
6236 */
6237 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006238 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006239 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006240 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006241 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006242 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006243 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006244 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006245 return(NULL);
6246 }
6247 NEXT;
6248
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006249 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006250 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006251 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006252 xmlFreeDocElementContent(ctxt->myDoc, last);
6253 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006254 return(NULL);
6255 }
6256 if (last == NULL) {
6257 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006258 if (ret != NULL)
6259 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006260 ret = cur = op;
6261 } else {
6262 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006263 if (op != NULL)
6264 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006265 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006266 if (last != NULL)
6267 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006268 cur =op;
6269 last = NULL;
6270 }
6271 } else if (RAW == '|') {
6272 if (type == 0) type = CUR;
6273
6274 /*
6275 * Detect "Name , Name | Name" error
6276 */
6277 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006278 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006279 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006280 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006281 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006282 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006283 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006284 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006285 return(NULL);
6286 }
6287 NEXT;
6288
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006289 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006290 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006291 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006292 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006293 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006294 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006295 return(NULL);
6296 }
6297 if (last == NULL) {
6298 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006299 if (ret != NULL)
6300 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006301 ret = cur = op;
6302 } else {
6303 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006304 if (op != NULL)
6305 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006306 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006307 if (last != NULL)
6308 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006309 cur =op;
6310 last = NULL;
6311 }
6312 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006313 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006314 if ((last != NULL) && (last != ret))
6315 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006316 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006317 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006318 return(NULL);
6319 }
6320 GROW;
6321 SKIP_BLANKS;
6322 GROW;
6323 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006324 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006325 /* Recurse on second child */
6326 NEXT;
6327 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006328 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6329 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006330 SKIP_BLANKS;
6331 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006332 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006333 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006334 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006335 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006336 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 return(NULL);
6338 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006339 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006340 if (last == NULL) {
6341 if (ret != NULL)
6342 xmlFreeDocElementContent(ctxt->myDoc, ret);
6343 return(NULL);
6344 }
Owen Taylor3473f882001-02-23 17:55:21 +00006345 if (RAW == '?') {
6346 last->ocur = XML_ELEMENT_CONTENT_OPT;
6347 NEXT;
6348 } else if (RAW == '*') {
6349 last->ocur = XML_ELEMENT_CONTENT_MULT;
6350 NEXT;
6351 } else if (RAW == '+') {
6352 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6353 NEXT;
6354 } else {
6355 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6356 }
6357 }
6358 SKIP_BLANKS;
6359 GROW;
6360 }
6361 if ((cur != NULL) && (last != NULL)) {
6362 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006363 if (last != NULL)
6364 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006365 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006366 if (ctxt->input->id != inputchk) {
6367 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6368 "Element content declaration doesn't start and stop in"
6369 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006370 }
Owen Taylor3473f882001-02-23 17:55:21 +00006371 NEXT;
6372 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006373 if (ret != NULL) {
6374 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6375 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6376 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6377 else
6378 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6379 }
Owen Taylor3473f882001-02-23 17:55:21 +00006380 NEXT;
6381 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006382 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006383 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006384 cur = ret;
6385 /*
6386 * Some normalization:
6387 * (a | b* | c?)* == (a | b | c)*
6388 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006389 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006390 if ((cur->c1 != NULL) &&
6391 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6392 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6393 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6394 if ((cur->c2 != NULL) &&
6395 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6396 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6397 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6398 cur = cur->c2;
6399 }
6400 }
Owen Taylor3473f882001-02-23 17:55:21 +00006401 NEXT;
6402 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006403 if (ret != NULL) {
6404 int found = 0;
6405
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006406 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6407 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6408 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006409 else
6410 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006411 /*
6412 * Some normalization:
6413 * (a | b*)+ == (a | b)*
6414 * (a | b?)+ == (a | b)*
6415 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006416 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006417 if ((cur->c1 != NULL) &&
6418 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6419 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6420 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6421 found = 1;
6422 }
6423 if ((cur->c2 != NULL) &&
6424 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6425 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6426 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6427 found = 1;
6428 }
6429 cur = cur->c2;
6430 }
6431 if (found)
6432 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6433 }
Owen Taylor3473f882001-02-23 17:55:21 +00006434 NEXT;
6435 }
6436 return(ret);
6437}
6438
6439/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006440 * xmlParseElementChildrenContentDecl:
6441 * @ctxt: an XML parser context
6442 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006443 *
6444 * parse the declaration for a Mixed Element content
6445 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6446 *
6447 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6448 *
6449 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6450 *
6451 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6452 *
6453 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6454 *
6455 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6456 * TODO Parameter-entity replacement text must be properly nested
6457 * with parenthesized groups. That is to say, if either of the
6458 * opening or closing parentheses in a choice, seq, or Mixed
6459 * construct is contained in the replacement text for a parameter
6460 * entity, both must be contained in the same replacement text. For
6461 * interoperability, if a parameter-entity reference appears in a
6462 * choice, seq, or Mixed construct, its replacement text should not
6463 * be empty, and neither the first nor last non-blank character of
6464 * the replacement text should be a connector (| or ,).
6465 *
6466 * Returns the tree of xmlElementContentPtr describing the element
6467 * hierarchy.
6468 */
6469xmlElementContentPtr
6470xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6471 /* stub left for API/ABI compat */
6472 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6473}
6474
6475/**
Owen Taylor3473f882001-02-23 17:55:21 +00006476 * xmlParseElementContentDecl:
6477 * @ctxt: an XML parser context
6478 * @name: the name of the element being defined.
6479 * @result: the Element Content pointer will be stored here if any
6480 *
6481 * parse the declaration for an Element content either Mixed or Children,
6482 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006483 *
Owen Taylor3473f882001-02-23 17:55:21 +00006484 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6485 *
6486 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6487 */
6488
6489int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006490xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006491 xmlElementContentPtr *result) {
6492
6493 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006494 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006495 int res;
6496
6497 *result = NULL;
6498
6499 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006500 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006501 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006502 return(-1);
6503 }
6504 NEXT;
6505 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006506 if (ctxt->instate == XML_PARSER_EOF)
6507 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006508 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006509 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006510 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006511 res = XML_ELEMENT_TYPE_MIXED;
6512 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006513 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006514 res = XML_ELEMENT_TYPE_ELEMENT;
6515 }
Owen Taylor3473f882001-02-23 17:55:21 +00006516 SKIP_BLANKS;
6517 *result = tree;
6518 return(res);
6519}
6520
6521/**
6522 * xmlParseElementDecl:
6523 * @ctxt: an XML parser context
6524 *
6525 * parse an Element declaration.
6526 *
6527 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6528 *
6529 * [ VC: Unique Element Type Declaration ]
6530 * No element type may be declared more than once
6531 *
6532 * Returns the type of the element, or -1 in case of error
6533 */
6534int
6535xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006536 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006537 int ret = -1;
6538 xmlElementContentPtr content = NULL;
6539
Daniel Veillard4c778d82005-01-23 17:37:44 +00006540 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006541 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006542 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006543
6544 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006545 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006546 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6547 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006548 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006549 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006550 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006551 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006552 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6553 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006554 return(-1);
6555 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006556 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006557 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6558 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006559 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006560 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006561 SKIP(5);
6562 /*
6563 * Element must always be empty.
6564 */
6565 ret = XML_ELEMENT_TYPE_EMPTY;
6566 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6567 (NXT(2) == 'Y')) {
6568 SKIP(3);
6569 /*
6570 * Element is a generic container.
6571 */
6572 ret = XML_ELEMENT_TYPE_ANY;
6573 } else if (RAW == '(') {
6574 ret = xmlParseElementContentDecl(ctxt, name, &content);
6575 } else {
6576 /*
6577 * [ WFC: PEs in Internal Subset ] error handling.
6578 */
6579 if ((RAW == '%') && (ctxt->external == 0) &&
6580 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006581 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006582 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006583 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006584 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006585 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6586 }
Owen Taylor3473f882001-02-23 17:55:21 +00006587 return(-1);
6588 }
6589
6590 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006591
6592 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006593 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006594 if (content != NULL) {
6595 xmlFreeDocElementContent(ctxt->myDoc, content);
6596 }
Owen Taylor3473f882001-02-23 17:55:21 +00006597 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006598 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006599 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006600 "Element declaration doesn't start and stop in"
6601 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006602 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006603
Owen Taylor3473f882001-02-23 17:55:21 +00006604 NEXT;
6605 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006606 (ctxt->sax->elementDecl != NULL)) {
6607 if (content != NULL)
6608 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006609 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6610 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006611 if ((content != NULL) && (content->parent == NULL)) {
6612 /*
6613 * this is a trick: if xmlAddElementDecl is called,
6614 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006615 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006616 * interfaces or change the API/ABI
6617 */
6618 xmlFreeDocElementContent(ctxt->myDoc, content);
6619 }
6620 } else if (content != NULL) {
6621 xmlFreeDocElementContent(ctxt->myDoc, content);
6622 }
Owen Taylor3473f882001-02-23 17:55:21 +00006623 }
Owen Taylor3473f882001-02-23 17:55:21 +00006624 }
6625 return(ret);
6626}
6627
6628/**
Owen Taylor3473f882001-02-23 17:55:21 +00006629 * xmlParseConditionalSections
6630 * @ctxt: an XML parser context
6631 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006632 * [61] conditionalSect ::= includeSect | ignoreSect
6633 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006634 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6635 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6636 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6637 */
6638
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006639static void
Owen Taylor3473f882001-02-23 17:55:21 +00006640xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006641 int *inputIds = NULL;
6642 size_t inputIdsSize = 0;
6643 size_t depth = 0;
Daniel Veillard49d44052008-08-27 19:57:06 +00006644
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006645 while (ctxt->instate != XML_PARSER_EOF) {
6646 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6647 int id = ctxt->input->id;
6648
6649 SKIP(3);
6650 SKIP_BLANKS;
6651
6652 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6653 SKIP(7);
6654 SKIP_BLANKS;
6655 if (RAW != '[') {
6656 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6657 xmlHaltParser(ctxt);
6658 goto error;
6659 }
6660 if (ctxt->input->id != id) {
6661 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6662 "All markup of the conditional section is"
6663 " not in the same entity\n");
6664 }
6665 NEXT;
6666
6667 if (inputIdsSize <= depth) {
6668 int *tmp;
6669
6670 inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6671 tmp = (int *) xmlRealloc(inputIds,
6672 inputIdsSize * sizeof(int));
6673 if (tmp == NULL) {
6674 xmlErrMemory(ctxt, NULL);
6675 goto error;
6676 }
6677 inputIds = tmp;
6678 }
6679 inputIds[depth] = id;
6680 depth++;
6681 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6682 int state;
6683 xmlParserInputState instate;
6684 size_t ignoreDepth = 0;
6685
6686 SKIP(6);
6687 SKIP_BLANKS;
6688 if (RAW != '[') {
6689 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6690 xmlHaltParser(ctxt);
6691 goto error;
6692 }
6693 if (ctxt->input->id != id) {
6694 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6695 "All markup of the conditional section is"
6696 " not in the same entity\n");
6697 }
6698 NEXT;
6699
6700 /*
6701 * Parse up to the end of the conditional section but disable
6702 * SAX event generating DTD building in the meantime
6703 */
6704 state = ctxt->disableSAX;
6705 instate = ctxt->instate;
6706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6707 ctxt->instate = XML_PARSER_IGNORE;
6708
6709 while (RAW != 0) {
6710 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6711 SKIP(3);
6712 ignoreDepth++;
6713 /* Check for integer overflow */
6714 if (ignoreDepth == 0) {
6715 xmlErrMemory(ctxt, NULL);
6716 goto error;
6717 }
6718 } else if ((RAW == ']') && (NXT(1) == ']') &&
6719 (NXT(2) == '>')) {
6720 if (ignoreDepth == 0)
6721 break;
6722 SKIP(3);
6723 ignoreDepth--;
6724 } else {
6725 NEXT;
6726 }
6727 }
6728
6729 ctxt->disableSAX = state;
6730 ctxt->instate = instate;
6731
6732 if (RAW == 0) {
6733 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6734 goto error;
6735 }
6736 if (ctxt->input->id != id) {
6737 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6738 "All markup of the conditional section is"
6739 " not in the same entity\n");
6740 }
6741 SKIP(3);
6742 } else {
6743 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6744 xmlHaltParser(ctxt);
6745 goto error;
6746 }
6747 } else if ((depth > 0) &&
6748 (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6749 depth--;
6750 if (ctxt->input->id != inputIds[depth]) {
6751 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752 "All markup of the conditional section is not"
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006753 " in the same entity\n");
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006754 }
6755 SKIP(3);
6756 } else {
6757 const xmlChar *check = CUR_PTR;
6758 unsigned int cons = ctxt->input->consumed;
6759
6760 xmlParseMarkupDecl(ctxt);
6761
6762 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6763 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6764 xmlHaltParser(ctxt);
6765 goto error;
6766 }
6767 }
6768
6769 if (depth == 0)
6770 break;
Owen Taylor3473f882001-02-23 17:55:21 +00006771
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006772 SKIP_BLANKS;
6773 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006774 }
6775
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006776error:
6777 xmlFree(inputIds);
Owen Taylor3473f882001-02-23 17:55:21 +00006778}
6779
6780/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006781 * xmlParseMarkupDecl:
6782 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006783 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006784 * parse Markup declarations
6785 *
6786 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6787 * NotationDecl | PI | Comment
6788 *
6789 * [ VC: Proper Declaration/PE Nesting ]
6790 * Parameter-entity replacement text must be properly nested with
6791 * markup declarations. That is to say, if either the first character
6792 * or the last character of a markup declaration (markupdecl above) is
6793 * contained in the replacement text for a parameter-entity reference,
6794 * both must be contained in the same replacement text.
6795 *
6796 * [ WFC: PEs in Internal Subset ]
6797 * In the internal DTD subset, parameter-entity references can occur
6798 * only where markup declarations can occur, not within markup declarations.
6799 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006800 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006801 */
6802void
6803xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6804 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006805 if (CUR == '<') {
6806 if (NXT(1) == '!') {
6807 switch (NXT(2)) {
6808 case 'E':
6809 if (NXT(3) == 'L')
6810 xmlParseElementDecl(ctxt);
6811 else if (NXT(3) == 'N')
6812 xmlParseEntityDecl(ctxt);
6813 break;
6814 case 'A':
6815 xmlParseAttributeListDecl(ctxt);
6816 break;
6817 case 'N':
6818 xmlParseNotationDecl(ctxt);
6819 break;
6820 case '-':
6821 xmlParseComment(ctxt);
6822 break;
6823 default:
6824 /* there is an error but it will be detected later */
6825 break;
6826 }
6827 } else if (NXT(1) == '?') {
6828 xmlParsePI(ctxt);
6829 }
6830 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006831
6832 /*
6833 * detect requirement to exit there and act accordingly
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006834 * and avoid having instate overridden later on
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006835 */
6836 if (ctxt->instate == XML_PARSER_EOF)
6837 return;
6838
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006839 ctxt->instate = XML_PARSER_DTD;
6840}
6841
6842/**
6843 * xmlParseTextDecl:
6844 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006845 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006846 * parse an XML declaration header for external entities
6847 *
6848 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006849 */
6850
6851void
6852xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6853 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006854 const xmlChar *encoding;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006855 int oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006856
6857 /*
6858 * We know that '<?xml' is here.
6859 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006860 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006861 SKIP(5);
6862 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006863 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006864 return;
6865 }
6866
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006867 /* Avoid expansion of parameter entities when skipping blanks. */
6868 oldstate = ctxt->instate;
6869 ctxt->instate = XML_PARSER_START;
6870
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006871 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006872 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6873 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006874 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006875
6876 /*
6877 * We may have the VersionInfo here.
6878 */
6879 version = xmlParseVersionInfo(ctxt);
6880 if (version == NULL)
6881 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006882 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006883 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006884 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6885 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006886 }
6887 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006888 ctxt->input->version = version;
6889
6890 /*
6891 * We must have the encoding declaration
6892 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006893 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006894 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6895 /*
6896 * The XML REC instructs us to stop parsing right here
6897 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006898 ctxt->instate = oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006899 return;
6900 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006901 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6902 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6903 "Missing encoding in text declaration\n");
6904 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006905
6906 SKIP_BLANKS;
6907 if ((RAW == '?') && (NXT(1) == '>')) {
6908 SKIP(2);
6909 } else if (RAW == '>') {
6910 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006911 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006912 NEXT;
6913 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006914 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006915 MOVETO_ENDTAG(CUR_PTR);
6916 NEXT;
6917 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07006918
6919 ctxt->instate = oldstate;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006920}
6921
6922/**
Owen Taylor3473f882001-02-23 17:55:21 +00006923 * xmlParseExternalSubset:
6924 * @ctxt: an XML parser context
6925 * @ExternalID: the external identifier
6926 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006927 *
Owen Taylor3473f882001-02-23 17:55:21 +00006928 * parse Markup declarations from an external subset
6929 *
6930 * [30] extSubset ::= textDecl? extSubsetDecl
6931 *
6932 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6933 */
6934void
6935xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6936 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006937 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006938 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006939
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006940 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006941 (ctxt->input->end - ctxt->input->cur >= 4)) {
6942 xmlChar start[4];
6943 xmlCharEncoding enc;
6944
6945 start[0] = RAW;
6946 start[1] = NXT(1);
6947 start[2] = NXT(2);
6948 start[3] = NXT(3);
6949 enc = xmlDetectCharEncoding(start, 4);
6950 if (enc != XML_CHAR_ENCODING_NONE)
6951 xmlSwitchEncoding(ctxt, enc);
6952 }
6953
Daniel Veillarda07050d2003-10-19 14:46:32 +00006954 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006955 xmlParseTextDecl(ctxt);
6956 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6957 /*
6958 * The XML REC instructs us to stop parsing right here
6959 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08006960 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006961 return;
6962 }
6963 }
6964 if (ctxt->myDoc == NULL) {
6965 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006966 if (ctxt->myDoc == NULL) {
6967 xmlErrMemory(ctxt, "New Doc failed");
6968 return;
6969 }
6970 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006971 }
6972 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6973 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6974
6975 ctxt->instate = XML_PARSER_DTD;
6976 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006977 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006978 while (((RAW == '<') && (NXT(1) == '?')) ||
6979 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006980 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006981 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006982 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006983
6984 GROW;
6985 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6986 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006987 } else
6988 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006989 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006990
Daniel Veillardfdc91562002-07-01 21:52:03 +00006991 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006992 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006993 break;
6994 }
6995 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006996
Owen Taylor3473f882001-02-23 17:55:21 +00006997 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006998 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006999 }
7000
7001}
7002
7003/**
7004 * xmlParseReference:
7005 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007006 *
Owen Taylor3473f882001-02-23 17:55:21 +00007007 * parse and handle entity references in content, depending on the SAX
7008 * interface, this may end-up in a call to character() if this is a
7009 * CharRef, a predefined entity, if there is no reference() callback.
7010 * or if the parser was asked to switch to that mode.
7011 *
7012 * [67] Reference ::= EntityRef | CharRef
7013 */
7014void
7015xmlParseReference(xmlParserCtxtPtr ctxt) {
7016 xmlEntityPtr ent;
7017 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007018 int was_checked;
7019 xmlNodePtr list = NULL;
7020 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007021
Daniel Veillard0161e632008-08-28 15:36:32 +00007022
7023 if (RAW != '&')
7024 return;
7025
7026 /*
7027 * Simple case of a CharRef
7028 */
Owen Taylor3473f882001-02-23 17:55:21 +00007029 if (NXT(1) == '#') {
7030 int i = 0;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007031 xmlChar out[16];
Owen Taylor3473f882001-02-23 17:55:21 +00007032 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007033 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007034
Daniel Veillarddc171602008-03-26 17:41:38 +00007035 if (value == 0)
7036 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007037 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7038 /*
7039 * So we are using non-UTF-8 buffers
7040 * Check that the char fit on 8bits, if not
7041 * generate a CharRef.
7042 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007043 if (value <= 0xFF) {
7044 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007045 out[1] = 0;
7046 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7047 (!ctxt->disableSAX))
7048 ctxt->sax->characters(ctxt->userData, out, 1);
7049 } else {
7050 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007051 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007052 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007053 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007054 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7055 (!ctxt->disableSAX))
7056 ctxt->sax->reference(ctxt->userData, out);
7057 }
7058 } else {
7059 /*
7060 * Just encode the value in UTF-8
7061 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007062 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007063 out[i] = 0;
7064 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7065 (!ctxt->disableSAX))
7066 ctxt->sax->characters(ctxt->userData, out, i);
7067 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007068 return;
7069 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007070
Daniel Veillard0161e632008-08-28 15:36:32 +00007071 /*
7072 * We are seeing an entity reference
7073 */
7074 ent = xmlParseEntityRef(ctxt);
7075 if (ent == NULL) return;
7076 if (!ctxt->wellFormed)
7077 return;
7078 was_checked = ent->checked;
7079
7080 /* special case of predefined entities */
7081 if ((ent->name == NULL) ||
7082 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7083 val = ent->content;
7084 if (val == NULL) return;
7085 /*
7086 * inline the entity.
7087 */
7088 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7089 (!ctxt->disableSAX))
7090 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7091 return;
7092 }
7093
7094 /*
7095 * The first reference to the entity trigger a parsing phase
7096 * where the ent->children is filled with the result from
7097 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007098 * Note: external parsed entities will not be loaded, it is not
7099 * required for a non-validating parser, unless the parsing option
7100 * of validating, or substituting entities were given. Doing so is
7101 * far more secure as the parser will only process data coming from
7102 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007103 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007104 if (((ent->checked == 0) ||
7105 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007106 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7107 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007108 unsigned long oldnbent = ctxt->nbentities, diff;
Daniel Veillard0161e632008-08-28 15:36:32 +00007109
7110 /*
7111 * This is a bit hackish but this seems the best
7112 * way to make sure both SAX and DOM entity support
7113 * behaves okay.
7114 */
7115 void *user_data;
7116 if (ctxt->userData == ctxt)
7117 user_data = NULL;
7118 else
7119 user_data = ctxt->userData;
7120
7121 /*
7122 * Check that this entity is well formed
7123 * 4.3.2: An internal general parsed entity is well-formed
7124 * if its replacement text matches the production labeled
7125 * content.
7126 */
7127 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7128 ctxt->depth++;
7129 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7130 user_data, &list);
7131 ctxt->depth--;
7132
7133 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7134 ctxt->depth++;
7135 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7136 user_data, ctxt->depth, ent->URI,
7137 ent->ExternalID, &list);
7138 ctxt->depth--;
7139 } else {
7140 ret = XML_ERR_ENTITY_PE_INTERNAL;
7141 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7142 "invalid entity type found\n", NULL);
7143 }
7144
7145 /*
7146 * Store the number of entities needing parsing for this entity
7147 * content and do checkings
7148 */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007149 diff = ctxt->nbentities - oldnbent + 1;
7150 if (diff > INT_MAX / 2)
7151 diff = INT_MAX / 2;
7152 ent->checked = diff * 2;
Daniel Veillardcff25462013-03-11 15:57:55 +08007153 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7154 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007155 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007156 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007157 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007158 return;
7159 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007160 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007161 xmlFreeNodeList(list);
7162 return;
7163 }
Owen Taylor3473f882001-02-23 17:55:21 +00007164
Daniel Veillard0161e632008-08-28 15:36:32 +00007165 if ((ret == XML_ERR_OK) && (list != NULL)) {
7166 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7167 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7168 (ent->children == NULL)) {
7169 ent->children = list;
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007170 /*
7171 * Prune it directly in the generated document
7172 * except for single text nodes.
7173 */
7174 if ((ctxt->replaceEntities == 0) ||
7175 (ctxt->parseMode == XML_PARSE_READER) ||
7176 ((list->type == XML_TEXT_NODE) &&
7177 (list->next == NULL))) {
7178 ent->owner = 1;
7179 while (list != NULL) {
7180 list->parent = (xmlNodePtr) ent;
7181 xmlSetTreeDoc(list, ent->doc);
7182 if (list->next == NULL)
7183 ent->last = list;
7184 list = list->next;
7185 }
7186 list = NULL;
7187 } else {
7188 ent->owner = 0;
7189 while (list != NULL) {
7190 list->parent = (xmlNodePtr) ctxt->node;
7191 list->doc = ctxt->myDoc;
7192 if (list->next == NULL)
7193 ent->last = list;
7194 list = list->next;
7195 }
7196 list = ent->children;
Daniel Veillard0161e632008-08-28 15:36:32 +00007197#ifdef LIBXML_LEGACY_ENABLED
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007198 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7199 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007200#endif /* LIBXML_LEGACY_ENABLED */
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007201 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007202 } else {
7203 xmlFreeNodeList(list);
7204 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007205 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007206 } else if ((ret != XML_ERR_OK) &&
7207 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7208 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7209 "Entity '%s' failed to parse\n", ent->name);
Nick Wellnhofer60dded12018-01-22 15:04:58 +01007210 if (ent->content != NULL)
7211 ent->content[0] = 0;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007212 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007213 } else if (list != NULL) {
7214 xmlFreeNodeList(list);
7215 list = NULL;
7216 }
7217 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007218 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007219
7220 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7221 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007222 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007223 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007224 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007225
Daniel Veillard0161e632008-08-28 15:36:32 +00007226 /*
7227 * Now that the entity content has been gathered
7228 * provide it to the application, this can take different forms based
7229 * on the parsing modes.
7230 */
7231 if (ent->children == NULL) {
7232 /*
7233 * Probably running in SAX mode and the callbacks don't
7234 * build the entity content. So unless we already went
7235 * though parsing for first checking go though the entity
7236 * content to generate callbacks associated to the entity
7237 */
7238 if (was_checked != 0) {
7239 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007240 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007241 * This is a bit hackish but this seems the best
7242 * way to make sure both SAX and DOM entity support
7243 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007244 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007245 if (ctxt->userData == ctxt)
7246 user_data = NULL;
7247 else
7248 user_data = ctxt->userData;
7249
7250 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7251 ctxt->depth++;
7252 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7253 ent->content, user_data, NULL);
7254 ctxt->depth--;
7255 } else if (ent->etype ==
7256 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7257 ctxt->depth++;
7258 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7259 ctxt->sax, user_data, ctxt->depth,
7260 ent->URI, ent->ExternalID, NULL);
7261 ctxt->depth--;
7262 } else {
7263 ret = XML_ERR_ENTITY_PE_INTERNAL;
7264 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7265 "invalid entity type found\n", NULL);
7266 }
7267 if (ret == XML_ERR_ENTITY_LOOP) {
7268 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7269 return;
7270 }
7271 }
7272 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7273 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7274 /*
7275 * Entity reference callback comes second, it's somewhat
7276 * superfluous but a compatibility to historical behaviour
7277 */
7278 ctxt->sax->reference(ctxt->userData, ent->name);
7279 }
7280 return;
7281 }
7282
7283 /*
7284 * If we didn't get any children for the entity being built
7285 */
7286 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7287 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7288 /*
7289 * Create a node.
7290 */
7291 ctxt->sax->reference(ctxt->userData, ent->name);
7292 return;
7293 }
7294
7295 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7296 /*
7297 * There is a problem on the handling of _private for entities
7298 * (bug 155816): Should we copy the content of the field from
7299 * the entity (possibly overwriting some value set by the user
7300 * when a copy is created), should we leave it alone, or should
7301 * we try to take care of different situations? The problem
7302 * is exacerbated by the usage of this field by the xmlReader.
7303 * To fix this bug, we look at _private on the created node
7304 * and, if it's NULL, we copy in whatever was in the entity.
7305 * If it's not NULL we leave it alone. This is somewhat of a
7306 * hack - maybe we should have further tests to determine
7307 * what to do.
7308 */
7309 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7310 /*
7311 * Seems we are generating the DOM content, do
7312 * a simple tree copy for all references except the first
7313 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007314 */
7315 if (((list == NULL) && (ent->owner == 0)) ||
7316 (ctxt->parseMode == XML_PARSE_READER)) {
7317 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7318
7319 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007320 * We are copying here, make sure there is no abuse
7321 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007322 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007323 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7324 return;
7325
7326 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007327 * when operating on a reader, the entities definitions
7328 * are always owning the entities subtree.
7329 if (ctxt->parseMode == XML_PARSE_READER)
7330 ent->owner = 1;
7331 */
7332
7333 cur = ent->children;
7334 while (cur != NULL) {
7335 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7336 if (nw != NULL) {
7337 if (nw->_private == NULL)
7338 nw->_private = cur->_private;
7339 if (firstChild == NULL){
7340 firstChild = nw;
7341 }
7342 nw = xmlAddChild(ctxt->node, nw);
7343 }
7344 if (cur == ent->last) {
7345 /*
7346 * needed to detect some strange empty
7347 * node cases in the reader tests
7348 */
7349 if ((ctxt->parseMode == XML_PARSE_READER) &&
7350 (nw != NULL) &&
7351 (nw->type == XML_ELEMENT_NODE) &&
7352 (nw->children == NULL))
7353 nw->extra = 1;
7354
7355 break;
7356 }
7357 cur = cur->next;
7358 }
7359#ifdef LIBXML_LEGACY_ENABLED
7360 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7361 xmlAddEntityReference(ent, firstChild, nw);
7362#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007363 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007364 xmlNodePtr nw = NULL, cur, next, last,
7365 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007366
7367 /*
7368 * We are copying here, make sure there is no abuse
7369 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007370 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007371 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7372 return;
7373
Daniel Veillard0161e632008-08-28 15:36:32 +00007374 /*
7375 * Copy the entity child list and make it the new
7376 * entity child list. The goal is to make sure any
7377 * ID or REF referenced will be the one from the
7378 * document content and not the entity copy.
7379 */
7380 cur = ent->children;
7381 ent->children = NULL;
7382 last = ent->last;
7383 ent->last = NULL;
7384 while (cur != NULL) {
7385 next = cur->next;
7386 cur->next = NULL;
7387 cur->parent = NULL;
7388 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7389 if (nw != NULL) {
7390 if (nw->_private == NULL)
7391 nw->_private = cur->_private;
7392 if (firstChild == NULL){
7393 firstChild = cur;
7394 }
7395 xmlAddChild((xmlNodePtr) ent, nw);
7396 xmlAddChild(ctxt->node, cur);
7397 }
7398 if (cur == last)
7399 break;
7400 cur = next;
7401 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007402 if (ent->owner == 0)
7403 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007404#ifdef LIBXML_LEGACY_ENABLED
7405 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7406 xmlAddEntityReference(ent, firstChild, nw);
7407#endif /* LIBXML_LEGACY_ENABLED */
7408 } else {
7409 const xmlChar *nbktext;
7410
7411 /*
7412 * the name change is to avoid coalescing of the
7413 * node with a possible previous text one which
7414 * would make ent->children a dangling pointer
7415 */
7416 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7417 -1);
7418 if (ent->children->type == XML_TEXT_NODE)
7419 ent->children->name = nbktext;
7420 if ((ent->last != ent->children) &&
7421 (ent->last->type == XML_TEXT_NODE))
7422 ent->last->name = nbktext;
7423 xmlAddChildList(ctxt->node, ent->children);
7424 }
7425
7426 /*
7427 * This is to avoid a nasty side effect, see
7428 * characters() in SAX.c
7429 */
7430 ctxt->nodemem = 0;
7431 ctxt->nodelen = 0;
7432 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007433 }
7434 }
7435}
7436
7437/**
7438 * xmlParseEntityRef:
7439 * @ctxt: an XML parser context
7440 *
7441 * parse ENTITY references declarations
7442 *
7443 * [68] EntityRef ::= '&' Name ';'
7444 *
7445 * [ WFC: Entity Declared ]
7446 * In a document without any DTD, a document with only an internal DTD
7447 * subset which contains no parameter entity references, or a document
7448 * with "standalone='yes'", the Name given in the entity reference
7449 * must match that in an entity declaration, except that well-formed
7450 * documents need not declare any of the following entities: amp, lt,
7451 * gt, apos, quot. The declaration of a parameter entity must precede
7452 * any reference to it. Similarly, the declaration of a general entity
7453 * must precede any reference to it which appears in a default value in an
7454 * attribute-list declaration. Note that if entities are declared in the
7455 * external subset or in external parameter entities, a non-validating
7456 * processor is not obligated to read and process their declarations;
7457 * for such documents, the rule that an entity must be declared is a
7458 * well-formedness constraint only if standalone='yes'.
7459 *
7460 * [ WFC: Parsed Entity ]
7461 * An entity reference must not contain the name of an unparsed entity
7462 *
7463 * Returns the xmlEntityPtr if found, or NULL otherwise.
7464 */
7465xmlEntityPtr
7466xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007467 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007468 xmlEntityPtr ent = NULL;
7469
7470 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007471 if (ctxt->instate == XML_PARSER_EOF)
7472 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007473
Daniel Veillard0161e632008-08-28 15:36:32 +00007474 if (RAW != '&')
7475 return(NULL);
7476 NEXT;
7477 name = xmlParseName(ctxt);
7478 if (name == NULL) {
7479 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7480 "xmlParseEntityRef: no name\n");
7481 return(NULL);
7482 }
7483 if (RAW != ';') {
7484 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7485 return(NULL);
7486 }
7487 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007488
Daniel Veillard0161e632008-08-28 15:36:32 +00007489 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007490 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007491 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007492 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7493 ent = xmlGetPredefinedEntity(name);
7494 if (ent != NULL)
7495 return(ent);
7496 }
Owen Taylor3473f882001-02-23 17:55:21 +00007497
Daniel Veillard0161e632008-08-28 15:36:32 +00007498 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007499 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007500 */
7501 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007502
Daniel Veillard0161e632008-08-28 15:36:32 +00007503 /*
7504 * Ask first SAX for entity resolution, otherwise try the
7505 * entities which may have stored in the parser context.
7506 */
7507 if (ctxt->sax != NULL) {
7508 if (ctxt->sax->getEntity != NULL)
7509 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007510 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007511 (ctxt->options & XML_PARSE_OLDSAX))
7512 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007513 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7514 (ctxt->userData==ctxt)) {
7515 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007516 }
7517 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007518 if (ctxt->instate == XML_PARSER_EOF)
7519 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007520 /*
7521 * [ WFC: Entity Declared ]
7522 * In a document without any DTD, a document with only an
7523 * internal DTD subset which contains no parameter entity
7524 * references, or a document with "standalone='yes'", the
7525 * Name given in the entity reference must match that in an
7526 * entity declaration, except that well-formed documents
7527 * need not declare any of the following entities: amp, lt,
7528 * gt, apos, quot.
7529 * The declaration of a parameter entity must precede any
7530 * reference to it.
7531 * Similarly, the declaration of a general entity must
7532 * precede any reference to it which appears in a default
7533 * value in an attribute-list declaration. Note that if
7534 * entities are declared in the external subset or in
7535 * external parameter entities, a non-validating processor
7536 * is not obligated to read and process their declarations;
7537 * for such documents, the rule that an entity must be
7538 * declared is a well-formedness constraint only if
7539 * standalone='yes'.
7540 */
7541 if (ent == NULL) {
7542 if ((ctxt->standalone == 1) ||
7543 ((ctxt->hasExternalSubset == 0) &&
7544 (ctxt->hasPErefs == 0))) {
7545 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7546 "Entity '%s' not defined\n", name);
7547 } else {
7548 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7549 "Entity '%s' not defined\n", name);
7550 if ((ctxt->inSubset == 0) &&
7551 (ctxt->sax != NULL) &&
7552 (ctxt->sax->reference != NULL)) {
7553 ctxt->sax->reference(ctxt->userData, name);
7554 }
7555 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007556 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007557 ctxt->valid = 0;
7558 }
7559
7560 /*
7561 * [ WFC: Parsed Entity ]
7562 * An entity reference must not contain the name of an
7563 * unparsed entity
7564 */
7565 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7566 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7567 "Entity reference to unparsed entity %s\n", name);
7568 }
7569
7570 /*
7571 * [ WFC: No External Entity References ]
7572 * Attribute values cannot contain direct or indirect
7573 * entity references to external entities.
7574 */
7575 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7576 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7577 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7578 "Attribute references external entity '%s'\n", name);
7579 }
7580 /*
7581 * [ WFC: No < in Attribute Values ]
7582 * The replacement text of any entity referred to directly or
7583 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007584 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007585 */
7586 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007587 (ent != NULL) &&
7588 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007589 if (((ent->checked & 1) || (ent->checked == 0)) &&
7590 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007591 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7592 "'<' in entity '%s' is not allowed in attributes values\n", name);
7593 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007594 }
7595
7596 /*
7597 * Internal check, no parameter entities here ...
7598 */
7599 else {
7600 switch (ent->etype) {
7601 case XML_INTERNAL_PARAMETER_ENTITY:
7602 case XML_EXTERNAL_PARAMETER_ENTITY:
7603 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7604 "Attempt to reference the parameter entity '%s'\n",
7605 name);
7606 break;
7607 default:
7608 break;
7609 }
7610 }
7611
7612 /*
7613 * [ WFC: No Recursion ]
7614 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007615 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007616 * Done somewhere else
7617 */
Owen Taylor3473f882001-02-23 17:55:21 +00007618 return(ent);
7619}
7620
7621/**
7622 * xmlParseStringEntityRef:
7623 * @ctxt: an XML parser context
7624 * @str: a pointer to an index in the string
7625 *
7626 * parse ENTITY references declarations, but this version parses it from
7627 * a string value.
7628 *
7629 * [68] EntityRef ::= '&' Name ';'
7630 *
7631 * [ WFC: Entity Declared ]
7632 * In a document without any DTD, a document with only an internal DTD
7633 * subset which contains no parameter entity references, or a document
7634 * with "standalone='yes'", the Name given in the entity reference
7635 * must match that in an entity declaration, except that well-formed
7636 * documents need not declare any of the following entities: amp, lt,
7637 * gt, apos, quot. The declaration of a parameter entity must precede
7638 * any reference to it. Similarly, the declaration of a general entity
7639 * must precede any reference to it which appears in a default value in an
7640 * attribute-list declaration. Note that if entities are declared in the
7641 * external subset or in external parameter entities, a non-validating
7642 * processor is not obligated to read and process their declarations;
7643 * for such documents, the rule that an entity must be declared is a
7644 * well-formedness constraint only if standalone='yes'.
7645 *
7646 * [ WFC: Parsed Entity ]
7647 * An entity reference must not contain the name of an unparsed entity
7648 *
7649 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7650 * is updated to the current location in the string.
7651 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007652static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007653xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7654 xmlChar *name;
7655 const xmlChar *ptr;
7656 xmlChar cur;
7657 xmlEntityPtr ent = NULL;
7658
7659 if ((str == NULL) || (*str == NULL))
7660 return(NULL);
7661 ptr = *str;
7662 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007663 if (cur != '&')
7664 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007665
Daniel Veillard0161e632008-08-28 15:36:32 +00007666 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007667 name = xmlParseStringName(ctxt, &ptr);
7668 if (name == NULL) {
7669 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7670 "xmlParseStringEntityRef: no name\n");
7671 *str = ptr;
7672 return(NULL);
7673 }
7674 if (*ptr != ';') {
7675 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007676 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007677 *str = ptr;
7678 return(NULL);
7679 }
7680 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007681
Owen Taylor3473f882001-02-23 17:55:21 +00007682
Daniel Veillard0161e632008-08-28 15:36:32 +00007683 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007684 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007685 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007686 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7687 ent = xmlGetPredefinedEntity(name);
7688 if (ent != NULL) {
7689 xmlFree(name);
7690 *str = ptr;
7691 return(ent);
7692 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007693 }
Owen Taylor3473f882001-02-23 17:55:21 +00007694
Daniel Veillard0161e632008-08-28 15:36:32 +00007695 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007696 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007697 */
7698 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007699
Daniel Veillard0161e632008-08-28 15:36:32 +00007700 /*
7701 * Ask first SAX for entity resolution, otherwise try the
7702 * entities which may have stored in the parser context.
7703 */
7704 if (ctxt->sax != NULL) {
7705 if (ctxt->sax->getEntity != NULL)
7706 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007707 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7708 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007709 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7710 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007711 }
7712 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007713 if (ctxt->instate == XML_PARSER_EOF) {
7714 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007715 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007716 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007717
7718 /*
7719 * [ WFC: Entity Declared ]
7720 * In a document without any DTD, a document with only an
7721 * internal DTD subset which contains no parameter entity
7722 * references, or a document with "standalone='yes'", the
7723 * Name given in the entity reference must match that in an
7724 * entity declaration, except that well-formed documents
7725 * need not declare any of the following entities: amp, lt,
7726 * gt, apos, quot.
7727 * The declaration of a parameter entity must precede any
7728 * reference to it.
7729 * Similarly, the declaration of a general entity must
7730 * precede any reference to it which appears in a default
7731 * value in an attribute-list declaration. Note that if
7732 * entities are declared in the external subset or in
7733 * external parameter entities, a non-validating processor
7734 * is not obligated to read and process their declarations;
7735 * for such documents, the rule that an entity must be
7736 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007737 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007738 */
7739 if (ent == NULL) {
7740 if ((ctxt->standalone == 1) ||
7741 ((ctxt->hasExternalSubset == 0) &&
7742 (ctxt->hasPErefs == 0))) {
7743 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7744 "Entity '%s' not defined\n", name);
7745 } else {
7746 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7747 "Entity '%s' not defined\n",
7748 name);
7749 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007750 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007751 /* TODO ? check regressions ctxt->valid = 0; */
7752 }
7753
7754 /*
7755 * [ WFC: Parsed Entity ]
7756 * An entity reference must not contain the name of an
7757 * unparsed entity
7758 */
7759 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7760 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7761 "Entity reference to unparsed entity %s\n", name);
7762 }
7763
7764 /*
7765 * [ WFC: No External Entity References ]
7766 * Attribute values cannot contain direct or indirect
7767 * entity references to external entities.
7768 */
7769 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7770 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7771 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7772 "Attribute references external entity '%s'\n", name);
7773 }
7774 /*
7775 * [ WFC: No < in Attribute Values ]
7776 * The replacement text of any entity referred to directly or
7777 * indirectly in an attribute value (other than "&lt;") must
7778 * not contain a <.
7779 */
7780 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7781 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007782 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007783 (xmlStrchr(ent->content, '<'))) {
7784 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7785 "'<' in entity '%s' is not allowed in attributes values\n",
7786 name);
7787 }
7788
7789 /*
7790 * Internal check, no parameter entities here ...
7791 */
7792 else {
7793 switch (ent->etype) {
7794 case XML_INTERNAL_PARAMETER_ENTITY:
7795 case XML_EXTERNAL_PARAMETER_ENTITY:
7796 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7797 "Attempt to reference the parameter entity '%s'\n",
7798 name);
7799 break;
7800 default:
7801 break;
7802 }
7803 }
7804
7805 /*
7806 * [ WFC: No Recursion ]
7807 * A parsed entity must not contain a recursive reference
7808 * to itself, either directly or indirectly.
7809 * Done somewhere else
7810 */
7811
7812 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007813 *str = ptr;
7814 return(ent);
7815}
7816
7817/**
7818 * xmlParsePEReference:
7819 * @ctxt: an XML parser context
7820 *
7821 * parse PEReference declarations
7822 * The entity content is handled directly by pushing it's content as
7823 * a new input stream.
7824 *
7825 * [69] PEReference ::= '%' Name ';'
7826 *
7827 * [ WFC: No Recursion ]
7828 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007829 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007830 *
7831 * [ WFC: Entity Declared ]
7832 * In a document without any DTD, a document with only an internal DTD
7833 * subset which contains no parameter entity references, or a document
7834 * with "standalone='yes'", ... ... The declaration of a parameter
7835 * entity must precede any reference to it...
7836 *
7837 * [ VC: Entity Declared ]
7838 * In a document with an external subset or external parameter entities
7839 * with "standalone='no'", ... ... The declaration of a parameter entity
7840 * must precede any reference to it...
7841 *
7842 * [ WFC: In DTD ]
7843 * Parameter-entity references may only appear in the DTD.
7844 * NOTE: misleading but this is handled.
7845 */
7846void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007847xmlParsePEReference(xmlParserCtxtPtr ctxt)
7848{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007849 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007850 xmlEntityPtr entity = NULL;
7851 xmlParserInputPtr input;
7852
Daniel Veillard0161e632008-08-28 15:36:32 +00007853 if (RAW != '%')
7854 return;
7855 NEXT;
7856 name = xmlParseName(ctxt);
7857 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007858 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007859 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007860 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007861 if (xmlParserDebugEntities)
7862 xmlGenericError(xmlGenericErrorContext,
7863 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007864 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007865 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007866 return;
7867 }
7868
7869 NEXT;
7870
7871 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07007872 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007873 */
7874 ctxt->nbentities++;
7875
7876 /*
7877 * Request the entity from SAX
7878 */
7879 if ((ctxt->sax != NULL) &&
7880 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007881 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7882 if (ctxt->instate == XML_PARSER_EOF)
7883 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007884 if (entity == NULL) {
7885 /*
7886 * [ WFC: Entity Declared ]
7887 * In a document without any DTD, a document with only an
7888 * internal DTD subset which contains no parameter entity
7889 * references, or a document with "standalone='yes'", ...
7890 * ... The declaration of a parameter entity must precede
7891 * any reference to it...
7892 */
7893 if ((ctxt->standalone == 1) ||
7894 ((ctxt->hasExternalSubset == 0) &&
7895 (ctxt->hasPErefs == 0))) {
7896 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7897 "PEReference: %%%s; not found\n",
7898 name);
7899 } else {
7900 /*
7901 * [ VC: Entity Declared ]
7902 * In a document with an external subset or external
7903 * parameter entities with "standalone='no'", ...
7904 * ... The declaration of a parameter entity must
7905 * precede any reference to it...
7906 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007907 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7908 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7909 "PEReference: %%%s; not found\n",
7910 name, NULL);
7911 } else
7912 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7913 "PEReference: %%%s; not found\n",
7914 name, NULL);
7915 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007916 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007917 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007918 } else {
7919 /*
7920 * Internal checking in case the entity quest barfed
7921 */
7922 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7923 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7924 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7925 "Internal: %%%s; is not a parameter entity\n",
7926 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007927 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007928 xmlChar start[4];
7929 xmlCharEncoding enc;
7930
Neel Mehta90ccb582017-04-07 17:43:02 +02007931 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7932 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7933 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7934 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7935 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7936 (ctxt->replaceEntities == 0) &&
7937 (ctxt->validate == 0))
7938 return;
7939
Daniel Veillard0161e632008-08-28 15:36:32 +00007940 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007941 if (xmlPushInput(ctxt, input) < 0) {
7942 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00007943 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007944 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02007945
7946 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7947 /*
7948 * Get the 4 first bytes and decode the charset
7949 * if enc != XML_CHAR_ENCODING_NONE
7950 * plug some encoding conversion routines.
7951 * Note that, since we may have some non-UTF8
7952 * encoding (like UTF16, bug 135229), the 'length'
7953 * is not known, but we can calculate based upon
7954 * the amount of data in the buffer.
7955 */
7956 GROW
7957 if (ctxt->instate == XML_PARSER_EOF)
7958 return;
7959 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7960 start[0] = RAW;
7961 start[1] = NXT(1);
7962 start[2] = NXT(2);
7963 start[3] = NXT(3);
7964 enc = xmlDetectCharEncoding(start, 4);
7965 if (enc != XML_CHAR_ENCODING_NONE) {
7966 xmlSwitchEncoding(ctxt, enc);
7967 }
7968 }
7969
7970 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7971 (IS_BLANK_CH(NXT(5)))) {
7972 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02007973 }
7974 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007975 }
7976 }
7977 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007978}
7979
7980/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007981 * xmlLoadEntityContent:
7982 * @ctxt: an XML parser context
7983 * @entity: an unloaded system entity
7984 *
7985 * Load the original content of the given system entity from the
7986 * ExternalID/SystemID given. This is to be used for Included in Literal
7987 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7988 *
7989 * Returns 0 in case of success and -1 in case of failure
7990 */
7991static int
7992xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7993 xmlParserInputPtr input;
7994 xmlBufferPtr buf;
7995 int l, c;
7996 int count = 0;
7997
7998 if ((ctxt == NULL) || (entity == NULL) ||
7999 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8000 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8001 (entity->content != NULL)) {
8002 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8003 "xmlLoadEntityContent parameter error");
8004 return(-1);
8005 }
8006
8007 if (xmlParserDebugEntities)
8008 xmlGenericError(xmlGenericErrorContext,
8009 "Reading %s entity content input\n", entity->name);
8010
8011 buf = xmlBufferCreate();
8012 if (buf == NULL) {
8013 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8014 "xmlLoadEntityContent parameter error");
8015 return(-1);
8016 }
8017
8018 input = xmlNewEntityInputStream(ctxt, entity);
8019 if (input == NULL) {
8020 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8021 "xmlLoadEntityContent input error");
8022 xmlBufferFree(buf);
8023 return(-1);
8024 }
8025
8026 /*
8027 * Push the entity as the current input, read char by char
8028 * saving to the buffer until the end of the entity or an error
8029 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008030 if (xmlPushInput(ctxt, input) < 0) {
8031 xmlBufferFree(buf);
8032 return(-1);
8033 }
8034
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008035 GROW;
8036 c = CUR_CHAR(l);
8037 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8038 (IS_CHAR(c))) {
8039 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008040 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008041 count = 0;
8042 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008043 if (ctxt->instate == XML_PARSER_EOF) {
8044 xmlBufferFree(buf);
8045 return(-1);
8046 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008047 }
8048 NEXTL(l);
8049 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008050 if (c == 0) {
8051 count = 0;
8052 GROW;
8053 if (ctxt->instate == XML_PARSER_EOF) {
8054 xmlBufferFree(buf);
8055 return(-1);
8056 }
8057 c = CUR_CHAR(l);
8058 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008059 }
8060
8061 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8062 xmlPopInput(ctxt);
8063 } else if (!IS_CHAR(c)) {
8064 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8065 "xmlLoadEntityContent: invalid char value %d\n",
8066 c);
8067 xmlBufferFree(buf);
8068 return(-1);
8069 }
8070 entity->content = buf->content;
8071 buf->content = NULL;
8072 xmlBufferFree(buf);
8073
8074 return(0);
8075}
8076
8077/**
Owen Taylor3473f882001-02-23 17:55:21 +00008078 * xmlParseStringPEReference:
8079 * @ctxt: an XML parser context
8080 * @str: a pointer to an index in the string
8081 *
8082 * parse PEReference declarations
8083 *
8084 * [69] PEReference ::= '%' Name ';'
8085 *
8086 * [ WFC: No Recursion ]
8087 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008088 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008089 *
8090 * [ WFC: Entity Declared ]
8091 * In a document without any DTD, a document with only an internal DTD
8092 * subset which contains no parameter entity references, or a document
8093 * with "standalone='yes'", ... ... The declaration of a parameter
8094 * entity must precede any reference to it...
8095 *
8096 * [ VC: Entity Declared ]
8097 * In a document with an external subset or external parameter entities
8098 * with "standalone='no'", ... ... The declaration of a parameter entity
8099 * must precede any reference to it...
8100 *
8101 * [ WFC: In DTD ]
8102 * Parameter-entity references may only appear in the DTD.
8103 * NOTE: misleading but this is handled.
8104 *
8105 * Returns the string of the entity content.
8106 * str is updated to the current value of the index
8107 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008108static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008109xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8110 const xmlChar *ptr;
8111 xmlChar cur;
8112 xmlChar *name;
8113 xmlEntityPtr entity = NULL;
8114
8115 if ((str == NULL) || (*str == NULL)) return(NULL);
8116 ptr = *str;
8117 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008118 if (cur != '%')
8119 return(NULL);
8120 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008121 name = xmlParseStringName(ctxt, &ptr);
8122 if (name == NULL) {
8123 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8124 "xmlParseStringPEReference: no name\n");
8125 *str = ptr;
8126 return(NULL);
8127 }
8128 cur = *ptr;
8129 if (cur != ';') {
8130 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8131 xmlFree(name);
8132 *str = ptr;
8133 return(NULL);
8134 }
8135 ptr++;
8136
8137 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008138 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00008139 */
8140 ctxt->nbentities++;
8141
8142 /*
8143 * Request the entity from SAX
8144 */
8145 if ((ctxt->sax != NULL) &&
8146 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008147 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8148 if (ctxt->instate == XML_PARSER_EOF) {
8149 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008150 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008151 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008152 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008153 if (entity == NULL) {
8154 /*
8155 * [ WFC: Entity Declared ]
8156 * In a document without any DTD, a document with only an
8157 * internal DTD subset which contains no parameter entity
8158 * references, or a document with "standalone='yes'", ...
8159 * ... The declaration of a parameter entity must precede
8160 * any reference to it...
8161 */
8162 if ((ctxt->standalone == 1) ||
8163 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8164 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8165 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008166 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008167 /*
8168 * [ VC: Entity Declared ]
8169 * In a document with an external subset or external
8170 * parameter entities with "standalone='no'", ...
8171 * ... The declaration of a parameter entity must
8172 * precede any reference to it...
8173 */
8174 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8175 "PEReference: %%%s; not found\n",
8176 name, NULL);
8177 ctxt->valid = 0;
8178 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008179 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008180 } else {
8181 /*
8182 * Internal checking in case the entity quest barfed
8183 */
8184 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8185 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8186 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8187 "%%%s; is not a parameter entity\n",
8188 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008189 }
8190 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008191 ctxt->hasPErefs = 1;
8192 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008193 *str = ptr;
8194 return(entity);
8195}
8196
8197/**
8198 * xmlParseDocTypeDecl:
8199 * @ctxt: an XML parser context
8200 *
8201 * parse a DOCTYPE declaration
8202 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008203 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008204 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8205 *
8206 * [ VC: Root Element Type ]
8207 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008208 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008209 */
8210
8211void
8212xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008213 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008214 xmlChar *ExternalID = NULL;
8215 xmlChar *URI = NULL;
8216
8217 /*
8218 * We know that '<!DOCTYPE' has been detected.
8219 */
8220 SKIP(9);
8221
8222 SKIP_BLANKS;
8223
8224 /*
8225 * Parse the DOCTYPE name.
8226 */
8227 name = xmlParseName(ctxt);
8228 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008229 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8230 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008231 }
8232 ctxt->intSubName = name;
8233
8234 SKIP_BLANKS;
8235
8236 /*
8237 * Check for SystemID and ExternalID
8238 */
8239 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8240
8241 if ((URI != NULL) || (ExternalID != NULL)) {
8242 ctxt->hasExternalSubset = 1;
8243 }
8244 ctxt->extSubURI = URI;
8245 ctxt->extSubSystem = ExternalID;
8246
8247 SKIP_BLANKS;
8248
8249 /*
8250 * Create and update the internal subset.
8251 */
8252 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8253 (!ctxt->disableSAX))
8254 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008255 if (ctxt->instate == XML_PARSER_EOF)
8256 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008257
8258 /*
8259 * Is there any internal subset declarations ?
8260 * they are handled separately in xmlParseInternalSubset()
8261 */
8262 if (RAW == '[')
8263 return;
8264
8265 /*
8266 * We should be at the end of the DOCTYPE declaration.
8267 */
8268 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008269 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008270 }
8271 NEXT;
8272}
8273
8274/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008275 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008276 * @ctxt: an XML parser context
8277 *
8278 * parse the internal subset declaration
8279 *
8280 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8281 */
8282
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008283static void
Owen Taylor3473f882001-02-23 17:55:21 +00008284xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8285 /*
8286 * Is there any DTD definition ?
8287 */
8288 if (RAW == '[') {
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008289 int baseInputNr = ctxt->inputNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008290 ctxt->instate = XML_PARSER_DTD;
8291 NEXT;
8292 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008293 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008294 * PEReferences.
8295 * Subsequence (markupdecl | PEReference | S)*
8296 */
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008297 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008298 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008299 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008300 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008301
8302 SKIP_BLANKS;
8303 xmlParseMarkupDecl(ctxt);
8304 xmlParsePEReference(ctxt);
8305
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008306 /*
8307 * Conditional sections are allowed from external entities included
8308 * by PE References in the internal subset.
8309 */
8310 if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8311 (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8312 xmlParseConditionalSections(ctxt);
8313 }
8314
Owen Taylor3473f882001-02-23 17:55:21 +00008315 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008316 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008317 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008318 if (ctxt->inputNr > baseInputNr)
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008319 xmlPopInput(ctxt);
8320 else
8321 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008322 }
8323 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008324 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008325 NEXT;
8326 SKIP_BLANKS;
8327 }
8328 }
8329
8330 /*
8331 * We should be at the end of the DOCTYPE declaration.
8332 */
8333 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008334 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008335 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008336 }
8337 NEXT;
8338}
8339
Daniel Veillard81273902003-09-30 00:43:48 +00008340#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008341/**
8342 * xmlParseAttribute:
8343 * @ctxt: an XML parser context
8344 * @value: a xmlChar ** used to store the value of the attribute
8345 *
8346 * parse an attribute
8347 *
8348 * [41] Attribute ::= Name Eq AttValue
8349 *
8350 * [ WFC: No External Entity References ]
8351 * Attribute values cannot contain direct or indirect entity references
8352 * to external entities.
8353 *
8354 * [ WFC: No < in Attribute Values ]
8355 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008356 * an attribute value (other than "&lt;") must not contain a <.
8357 *
Owen Taylor3473f882001-02-23 17:55:21 +00008358 * [ VC: Attribute Value Type ]
8359 * The attribute must have been declared; the value must be of the type
8360 * declared for it.
8361 *
8362 * [25] Eq ::= S? '=' S?
8363 *
8364 * With namespace:
8365 *
8366 * [NS 11] Attribute ::= QName Eq AttValue
8367 *
8368 * Also the case QName == xmlns:??? is handled independently as a namespace
8369 * definition.
8370 *
8371 * Returns the attribute name, and the value in *value.
8372 */
8373
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008374const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008375xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008376 const xmlChar *name;
8377 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008378
8379 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008380 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008381 name = xmlParseName(ctxt);
8382 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008383 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008384 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008385 return(NULL);
8386 }
8387
8388 /*
8389 * read the value
8390 */
8391 SKIP_BLANKS;
8392 if (RAW == '=') {
8393 NEXT;
8394 SKIP_BLANKS;
8395 val = xmlParseAttValue(ctxt);
8396 ctxt->instate = XML_PARSER_CONTENT;
8397 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008398 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008399 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008400 return(NULL);
8401 }
8402
8403 /*
8404 * Check that xml:lang conforms to the specification
8405 * No more registered as an error, just generate a warning now
8406 * since this was deprecated in XML second edition
8407 */
8408 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8409 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008410 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8411 "Malformed value for xml:lang : %s\n",
8412 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008413 }
8414 }
8415
8416 /*
8417 * Check that xml:space conforms to the specification
8418 */
8419 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8420 if (xmlStrEqual(val, BAD_CAST "default"))
8421 *(ctxt->space) = 0;
8422 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8423 *(ctxt->space) = 1;
8424 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008425 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008426"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008427 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008428 }
8429 }
8430
8431 *value = val;
8432 return(name);
8433}
8434
8435/**
8436 * xmlParseStartTag:
8437 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008438 *
Owen Taylor3473f882001-02-23 17:55:21 +00008439 * parse a start of tag either for rule element or
8440 * EmptyElement. In both case we don't parse the tag closing chars.
8441 *
8442 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8443 *
8444 * [ WFC: Unique Att Spec ]
8445 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008446 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008447 *
8448 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8449 *
8450 * [ WFC: Unique Att Spec ]
8451 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008452 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008453 *
8454 * With namespace:
8455 *
8456 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8457 *
8458 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8459 *
8460 * Returns the element name parsed
8461 */
8462
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008463const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008464xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008465 const xmlChar *name;
8466 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008467 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008468 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008469 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008470 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008471 int i;
8472
8473 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008474 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008475
8476 name = xmlParseName(ctxt);
8477 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008478 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008479 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008480 return(NULL);
8481 }
8482
8483 /*
8484 * Now parse the attributes, it ends up with the ending
8485 *
8486 * (S Attribute)* S?
8487 */
8488 SKIP_BLANKS;
8489 GROW;
8490
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008491 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008492 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008493 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008494 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008495 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008496
8497 attname = xmlParseAttribute(ctxt, &attvalue);
8498 if ((attname != NULL) && (attvalue != NULL)) {
8499 /*
8500 * [ WFC: Unique Att Spec ]
8501 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008502 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008503 */
8504 for (i = 0; i < nbatts;i += 2) {
8505 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008506 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008507 xmlFree(attvalue);
8508 goto failed;
8509 }
8510 }
Owen Taylor3473f882001-02-23 17:55:21 +00008511 /*
8512 * Add the pair to atts
8513 */
8514 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008515 maxatts = 22; /* allow for 10 attrs by default */
8516 atts = (const xmlChar **)
8517 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008518 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008519 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008520 if (attvalue != NULL)
8521 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008522 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008523 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008524 ctxt->atts = atts;
8525 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008526 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008527 const xmlChar **n;
8528
Owen Taylor3473f882001-02-23 17:55:21 +00008529 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008530 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008531 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008532 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008533 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008534 if (attvalue != NULL)
8535 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008536 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008537 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008538 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008539 ctxt->atts = atts;
8540 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008541 }
8542 atts[nbatts++] = attname;
8543 atts[nbatts++] = attvalue;
8544 atts[nbatts] = NULL;
8545 atts[nbatts + 1] = NULL;
8546 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008547 if (attvalue != NULL)
8548 xmlFree(attvalue);
8549 }
8550
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008551failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008552
Daniel Veillard3772de32002-12-17 10:31:45 +00008553 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008554 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8555 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008556 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008557 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8558 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008559 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008560 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8561 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008562 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8563 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008564 break;
8565 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008566 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008567 GROW;
8568 }
8569
8570 /*
8571 * SAX: Start of Element !
8572 */
8573 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008574 (!ctxt->disableSAX)) {
8575 if (nbatts > 0)
8576 ctxt->sax->startElement(ctxt->userData, name, atts);
8577 else
8578 ctxt->sax->startElement(ctxt->userData, name, NULL);
8579 }
Owen Taylor3473f882001-02-23 17:55:21 +00008580
8581 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008582 /* Free only the content strings */
8583 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008584 if (atts[i] != NULL)
8585 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008586 }
8587 return(name);
8588}
8589
8590/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008591 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008592 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008593 * @line: line of the start tag
8594 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008595 *
8596 * parse an end of tag
8597 *
8598 * [42] ETag ::= '</' Name S? '>'
8599 *
8600 * With namespace
8601 *
8602 * [NS 9] ETag ::= '</' QName S? '>'
8603 */
8604
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008605static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008606xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008607 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008608
8609 GROW;
8610 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008611 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008612 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008613 return;
8614 }
8615 SKIP(2);
8616
Daniel Veillard46de64e2002-05-29 08:21:33 +00008617 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008618
8619 /*
8620 * We should definitely be at the ending "S? '>'" part
8621 */
8622 GROW;
8623 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008624 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008625 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008626 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008627 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008628
8629 /*
8630 * [ WFC: Element Type Match ]
8631 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008632 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008633 *
8634 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008635 if (name != (xmlChar*)1) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008636 if (name == NULL) name = BAD_CAST "unparsable";
Daniel Veillardf403d292003-10-05 13:51:35 +00008637 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008638 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008639 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008640 }
8641
8642 /*
8643 * SAX: End of Tag
8644 */
8645 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8646 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008647 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008648
Daniel Veillarde57ec792003-09-10 10:50:59 +00008649 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008650 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008651 return;
8652}
8653
8654/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008655 * xmlParseEndTag:
8656 * @ctxt: an XML parser context
8657 *
8658 * parse an end of tag
8659 *
8660 * [42] ETag ::= '</' Name S? '>'
8661 *
8662 * With namespace
8663 *
8664 * [NS 9] ETag ::= '</' QName S? '>'
8665 */
8666
8667void
8668xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008669 xmlParseEndTag1(ctxt, 0);
8670}
Daniel Veillard81273902003-09-30 00:43:48 +00008671#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008672
8673/************************************************************************
8674 * *
8675 * SAX 2 specific operations *
8676 * *
8677 ************************************************************************/
8678
Daniel Veillard0fb18932003-09-07 09:14:37 +00008679/*
8680 * xmlGetNamespace:
8681 * @ctxt: an XML parser context
8682 * @prefix: the prefix to lookup
8683 *
8684 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008685 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008686 *
8687 * Returns the namespace name or NULL if not bound
8688 */
8689static const xmlChar *
8690xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8691 int i;
8692
Daniel Veillarde57ec792003-09-10 10:50:59 +00008693 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008694 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008695 if (ctxt->nsTab[i] == prefix) {
8696 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8697 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008698 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008699 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008700 return(NULL);
8701}
8702
8703/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008704 * xmlParseQName:
8705 * @ctxt: an XML parser context
8706 * @prefix: pointer to store the prefix part
8707 *
8708 * parse an XML Namespace QName
8709 *
8710 * [6] QName ::= (Prefix ':')? LocalPart
8711 * [7] Prefix ::= NCName
8712 * [8] LocalPart ::= NCName
8713 *
8714 * Returns the Name parsed or NULL
8715 */
8716
8717static const xmlChar *
8718xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8719 const xmlChar *l, *p;
8720
8721 GROW;
8722
8723 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008724 if (l == NULL) {
8725 if (CUR == ':') {
8726 l = xmlParseName(ctxt);
8727 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008728 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008729 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008730 *prefix = NULL;
8731 return(l);
8732 }
8733 }
8734 return(NULL);
8735 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736 if (CUR == ':') {
8737 NEXT;
8738 p = l;
8739 l = xmlParseNCName(ctxt);
8740 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008741 xmlChar *tmp;
8742
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008743 if (ctxt->instate == XML_PARSER_EOF)
8744 return(NULL);
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008745 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8746 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008747 l = xmlParseNmtoken(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008748 if (l == NULL) {
8749 if (ctxt->instate == XML_PARSER_EOF)
8750 return(NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008751 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008752 } else {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008753 tmp = xmlBuildQName(l, p, NULL, 0);
8754 xmlFree((char *)l);
8755 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008756 p = xmlDictLookup(ctxt->dict, tmp, -1);
8757 if (tmp != NULL) xmlFree(tmp);
8758 *prefix = NULL;
8759 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008760 }
8761 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008762 xmlChar *tmp;
8763
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008764 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8765 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008766 NEXT;
8767 tmp = (xmlChar *) xmlParseName(ctxt);
8768 if (tmp != NULL) {
8769 tmp = xmlBuildQName(tmp, l, NULL, 0);
8770 l = xmlDictLookup(ctxt->dict, tmp, -1);
8771 if (tmp != NULL) xmlFree(tmp);
8772 *prefix = p;
8773 return(l);
8774 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008775 if (ctxt->instate == XML_PARSER_EOF)
8776 return(NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008777 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8778 l = xmlDictLookup(ctxt->dict, tmp, -1);
8779 if (tmp != NULL) xmlFree(tmp);
8780 *prefix = p;
8781 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008782 }
8783 *prefix = p;
8784 } else
8785 *prefix = NULL;
8786 return(l);
8787}
8788
8789/**
8790 * xmlParseQNameAndCompare:
8791 * @ctxt: an XML parser context
8792 * @name: the localname
8793 * @prefix: the prefix, if any.
8794 *
8795 * parse an XML name and compares for match
8796 * (specialized for endtag parsing)
8797 *
8798 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8799 * and the name for mismatch
8800 */
8801
8802static const xmlChar *
8803xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8804 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008805 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008806 const xmlChar *in;
8807 const xmlChar *ret;
8808 const xmlChar *prefix2;
8809
8810 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8811
8812 GROW;
8813 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008814
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 cmp = prefix;
8816 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008817 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008818 ++cmp;
8819 }
8820 if ((*cmp == 0) && (*in == ':')) {
8821 in++;
8822 cmp = name;
8823 while (*in != 0 && *in == *cmp) {
8824 ++in;
8825 ++cmp;
8826 }
William M. Brack76e95df2003-10-18 16:20:14 +00008827 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008828 /* success */
8829 ctxt->input->cur = in;
8830 return((const xmlChar*) 1);
8831 }
8832 }
8833 /*
8834 * all strings coms from the dictionary, equality can be done directly
8835 */
8836 ret = xmlParseQName (ctxt, &prefix2);
8837 if ((ret == name) && (prefix == prefix2))
8838 return((const xmlChar*) 1);
8839 return ret;
8840}
8841
8842/**
8843 * xmlParseAttValueInternal:
8844 * @ctxt: an XML parser context
8845 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008846 * @alloc: whether the attribute was reallocated as a new string
8847 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008848 *
8849 * parse a value for an attribute.
8850 * NOTE: if no normalization is needed, the routine will return pointers
8851 * directly from the data buffer.
8852 *
8853 * 3.3.3 Attribute-Value Normalization:
8854 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008855 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008856 * - a character reference is processed by appending the referenced
8857 * character to the attribute value
8858 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008859 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008860 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8861 * appending #x20 to the normalized value, except that only a single
8862 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008863 * parsed entity or the literal entity value of an internal parsed entity
8864 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008865 * If the declared value is not CDATA, then the XML processor must further
8866 * process the normalized attribute value by discarding any leading and
8867 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008868 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008869 * All attributes for which no declaration has been read should be treated
8870 * by a non-validating parser as if declared CDATA.
8871 *
8872 * Returns the AttValue parsed or NULL. The value has to be freed by the
8873 * caller if it was copied, this can be detected by val[*len] == 0.
8874 */
8875
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008876#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8877 const xmlChar *oldbase = ctxt->input->base;\
8878 GROW;\
8879 if (ctxt->instate == XML_PARSER_EOF)\
8880 return(NULL);\
8881 if (oldbase != ctxt->input->base) {\
8882 ptrdiff_t delta = ctxt->input->base - oldbase;\
8883 start = start + delta;\
8884 in = in + delta;\
8885 }\
8886 end = ctxt->input->end;
8887
Daniel Veillard0fb18932003-09-07 09:14:37 +00008888static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008889xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8890 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008891{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008892 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008893 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008894 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008895 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008896
8897 GROW;
8898 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008899 line = ctxt->input->line;
8900 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008901 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008902 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008903 return (NULL);
8904 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008905 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008906
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008907 /*
8908 * try to handle in this routine the most common case where no
8909 * allocation of a new string is required and where content is
8910 * pure ASCII.
8911 */
8912 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008913 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008914 end = ctxt->input->end;
8915 start = in;
8916 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008917 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008919 if (normalize) {
8920 /*
8921 * Skip any leading spaces
8922 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008923 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008924 ((*in == 0x20) || (*in == 0x9) ||
8925 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008926 if (*in == 0xA) {
8927 line++; col = 1;
8928 } else {
8929 col++;
8930 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008931 in++;
8932 start = in;
8933 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008934 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08008935 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8936 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8937 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008938 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008939 return(NULL);
8940 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008941 }
8942 }
8943 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8944 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008945 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008946 if ((*in++ == 0x20) && (*in == 0x20)) break;
8947 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008948 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08008949 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8950 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8951 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008952 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008953 return(NULL);
8954 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008955 }
8956 }
8957 last = in;
8958 /*
8959 * skip the trailing blanks
8960 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008961 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008962 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008963 ((*in == 0x20) || (*in == 0x9) ||
8964 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008965 if (*in == 0xA) {
8966 line++, col = 1;
8967 } else {
8968 col++;
8969 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008970 in++;
8971 if (in >= end) {
8972 const xmlChar *oldbase = ctxt->input->base;
8973 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008974 if (ctxt->instate == XML_PARSER_EOF)
8975 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008976 if (oldbase != ctxt->input->base) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07008977 ptrdiff_t delta = ctxt->input->base - oldbase;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008978 start = start + delta;
8979 in = in + delta;
8980 last = last + delta;
8981 }
8982 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008983 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8984 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8985 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008986 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008987 return(NULL);
8988 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008989 }
8990 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008991 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8992 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8993 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008994 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008995 return(NULL);
8996 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008997 if (*in != limit) goto need_complex;
8998 } else {
8999 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9000 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9001 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009002 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009003 if (in >= end) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009004 GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
Daniel Veillarde17db992012-07-19 11:25:16 +08009005 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9006 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9007 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009008 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009009 return(NULL);
9010 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009011 }
9012 }
9013 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009014 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9015 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9016 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009017 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009018 return(NULL);
9019 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009020 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009021 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009022 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009023 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009024 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009025 *len = last - start;
9026 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009027 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009028 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009029 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009030 }
9031 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009032 ctxt->input->line = line;
9033 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009034 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009035 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009036need_complex:
9037 if (alloc) *alloc = 1;
9038 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009039}
9040
9041/**
9042 * xmlParseAttribute2:
9043 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009044 * @pref: the element prefix
9045 * @elem: the element name
9046 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009047 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009048 * @len: an int * to save the length of the attribute
9049 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009050 *
9051 * parse an attribute in the new SAX2 framework.
9052 *
9053 * Returns the attribute name, and the value in *value, .
9054 */
9055
9056static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009057xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009058 const xmlChar * pref, const xmlChar * elem,
9059 const xmlChar ** prefix, xmlChar ** value,
9060 int *len, int *alloc)
9061{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009062 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009063 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009064 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009065
9066 *value = NULL;
9067 GROW;
9068 name = xmlParseQName(ctxt, prefix);
9069 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009070 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9071 "error parsing attribute name\n");
9072 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009073 }
9074
9075 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009076 * get the type if needed
9077 */
9078 if (ctxt->attsSpecial != NULL) {
9079 int type;
9080
Nick Wellnhoferd422b952017-10-09 13:37:42 +02009081 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9082 pref, elem, *prefix, name);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009083 if (type != 0)
9084 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009085 }
9086
9087 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009088 * read the value
9089 */
9090 SKIP_BLANKS;
9091 if (RAW == '=') {
9092 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009093 SKIP_BLANKS;
9094 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9095 if (normalize) {
9096 /*
9097 * Sometimes a second normalisation pass for spaces is needed
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009098 * but that only happens if charrefs or entities references
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009099 * have been used in the attribute value, i.e. the attribute
9100 * value have been extracted in an allocated string already.
9101 */
9102 if (*alloc) {
9103 const xmlChar *val2;
9104
9105 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009106 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009107 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009108 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009109 }
9110 }
9111 }
9112 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009113 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009114 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009115 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009116 name);
9117 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009118 }
9119
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009120 if (*prefix == ctxt->str_xml) {
9121 /*
9122 * Check that xml:lang conforms to the specification
9123 * No more registered as an error, just generate a warning now
9124 * since this was deprecated in XML second edition
9125 */
9126 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9127 internal_val = xmlStrndup(val, *len);
9128 if (!xmlCheckLanguageID(internal_val)) {
9129 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9130 "Malformed value for xml:lang : %s\n",
9131 internal_val, NULL);
9132 }
9133 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009134
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009135 /*
9136 * Check that xml:space conforms to the specification
9137 */
9138 if (xmlStrEqual(name, BAD_CAST "space")) {
9139 internal_val = xmlStrndup(val, *len);
9140 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9141 *(ctxt->space) = 0;
9142 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9143 *(ctxt->space) = 1;
9144 else {
9145 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9146 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9147 internal_val, NULL);
9148 }
9149 }
9150 if (internal_val) {
9151 xmlFree(internal_val);
9152 }
9153 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009154
9155 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009156 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009157}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009158/**
9159 * xmlParseStartTag2:
9160 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009161 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009162 * parse a start of tag either for rule element or
9163 * EmptyElement. In both case we don't parse the tag closing chars.
9164 * This routine is called when running SAX2 parsing
9165 *
9166 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9167 *
9168 * [ WFC: Unique Att Spec ]
9169 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009170 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009171 *
9172 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9173 *
9174 * [ WFC: Unique Att Spec ]
9175 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009176 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009177 *
9178 * With namespace:
9179 *
9180 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9181 *
9182 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9183 *
9184 * Returns the element name parsed
9185 */
9186
9187static const xmlChar *
9188xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009189 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009190 const xmlChar *localname;
9191 const xmlChar *prefix;
9192 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009193 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009194 const xmlChar *nsname;
9195 xmlChar *attvalue;
9196 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009197 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009198 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009199 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009200 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009201 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009202
9203 if (RAW != '<') return(NULL);
9204 NEXT1;
9205
9206 /*
9207 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9208 * point since the attribute values may be stored as pointers to
9209 * the buffer and calling SHRINK would destroy them !
9210 * The Shrinking is only possible once the full set of attribute
9211 * callbacks have been done.
9212 */
9213 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009214 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009215 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009216 nbatts = 0;
9217 nratts = 0;
9218 nbdef = 0;
9219 nbNs = 0;
9220 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009221 /* Forget any namespaces added during an earlier parse of this element. */
9222 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009223
9224 localname = xmlParseQName(ctxt, &prefix);
9225 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009226 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9227 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009228 return(NULL);
9229 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009230 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009231
9232 /*
9233 * Now parse the attributes, it ends up with the ending
9234 *
9235 * (S Attribute)* S?
9236 */
9237 SKIP_BLANKS;
9238 GROW;
9239
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009240 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009241 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009242 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009243 const xmlChar *q = CUR_PTR;
9244 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009245 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009246
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009247 attname = xmlParseAttribute2(ctxt, prefix, localname,
9248 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009249 if ((attname == NULL) || (attvalue == NULL))
9250 goto next_attr;
9251 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009252
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009253 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9254 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9255 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009256
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009257 if (URL == NULL) {
9258 xmlErrMemory(ctxt, "dictionary allocation failure");
9259 if ((attvalue != NULL) && (alloc != 0))
9260 xmlFree(attvalue);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009261 localname = NULL;
9262 goto done;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009263 }
9264 if (*URL != 0) {
9265 uri = xmlParseURI((const char *) URL);
9266 if (uri == NULL) {
9267 xmlNsErr(ctxt, XML_WAR_NS_URI,
9268 "xmlns: '%s' is not a valid URI\n",
9269 URL, NULL, NULL);
9270 } else {
9271 if (uri->scheme == NULL) {
9272 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9273 "xmlns: URI %s is not absolute\n",
9274 URL, NULL, NULL);
9275 }
9276 xmlFreeURI(uri);
9277 }
Daniel Veillard37334572008-07-31 08:20:02 +00009278 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009279 if (attname != ctxt->str_xml) {
9280 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9281 "xml namespace URI cannot be the default namespace\n",
9282 NULL, NULL, NULL);
9283 }
9284 goto next_attr;
9285 }
9286 if ((len == 29) &&
9287 (xmlStrEqual(URL,
9288 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9289 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9290 "reuse of the xmlns namespace name is forbidden\n",
9291 NULL, NULL, NULL);
9292 goto next_attr;
9293 }
9294 }
9295 /*
9296 * check that it's not a defined namespace
9297 */
9298 for (j = 1;j <= nbNs;j++)
9299 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9300 break;
9301 if (j <= nbNs)
9302 xmlErrAttributeDup(ctxt, NULL, attname);
9303 else
9304 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009305
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009306 } else if (aprefix == ctxt->str_xmlns) {
9307 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9308 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009309
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009310 if (attname == ctxt->str_xml) {
9311 if (URL != ctxt->str_xml_ns) {
9312 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9313 "xml namespace prefix mapped to wrong URI\n",
9314 NULL, NULL, NULL);
9315 }
9316 /*
9317 * Do not keep a namespace definition node
9318 */
9319 goto next_attr;
9320 }
9321 if (URL == ctxt->str_xml_ns) {
9322 if (attname != ctxt->str_xml) {
9323 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9324 "xml namespace URI mapped to wrong prefix\n",
9325 NULL, NULL, NULL);
9326 }
9327 goto next_attr;
9328 }
9329 if (attname == ctxt->str_xmlns) {
9330 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9331 "redefinition of the xmlns prefix is forbidden\n",
9332 NULL, NULL, NULL);
9333 goto next_attr;
9334 }
9335 if ((len == 29) &&
9336 (xmlStrEqual(URL,
9337 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9338 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9339 "reuse of the xmlns namespace name is forbidden\n",
9340 NULL, NULL, NULL);
9341 goto next_attr;
9342 }
9343 if ((URL == NULL) || (URL[0] == 0)) {
9344 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9345 "xmlns:%s: Empty XML namespace is not allowed\n",
9346 attname, NULL, NULL);
9347 goto next_attr;
9348 } else {
9349 uri = xmlParseURI((const char *) URL);
9350 if (uri == NULL) {
9351 xmlNsErr(ctxt, XML_WAR_NS_URI,
9352 "xmlns:%s: '%s' is not a valid URI\n",
9353 attname, URL, NULL);
9354 } else {
9355 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9356 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9357 "xmlns:%s: URI %s is not absolute\n",
9358 attname, URL, NULL);
9359 }
9360 xmlFreeURI(uri);
9361 }
9362 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009363
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009364 /*
9365 * check that it's not a defined namespace
9366 */
9367 for (j = 1;j <= nbNs;j++)
9368 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9369 break;
9370 if (j <= nbNs)
9371 xmlErrAttributeDup(ctxt, aprefix, attname);
9372 else
9373 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9374
9375 } else {
9376 /*
9377 * Add the pair to atts
9378 */
9379 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9380 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9381 goto next_attr;
9382 }
9383 maxatts = ctxt->maxatts;
9384 atts = ctxt->atts;
9385 }
9386 ctxt->attallocs[nratts++] = alloc;
9387 atts[nbatts++] = attname;
9388 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009389 /*
9390 * The namespace URI field is used temporarily to point at the
9391 * base of the current input buffer for non-alloced attributes.
9392 * When the input buffer is reallocated, all the pointers become
9393 * invalid, but they can be reconstructed later.
9394 */
9395 if (alloc)
9396 atts[nbatts++] = NULL;
9397 else
9398 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009399 atts[nbatts++] = attvalue;
9400 attvalue += len;
9401 atts[nbatts++] = attvalue;
9402 /*
9403 * tag if some deallocation is needed
9404 */
9405 if (alloc != 0) attval = 1;
9406 attvalue = NULL; /* moved into atts */
9407 }
9408
9409next_attr:
9410 if ((attvalue != NULL) && (alloc != 0)) {
9411 xmlFree(attvalue);
9412 attvalue = NULL;
9413 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009414
9415 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009416 if (ctxt->instate == XML_PARSER_EOF)
9417 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009418 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9419 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009420 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009421 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9422 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009423 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009424 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009425 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9426 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009427 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009428 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009429 break;
9430 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009431 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009432 }
9433
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009434 if (ctxt->input->id != inputid) {
9435 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9436 "Unexpected change of input\n");
9437 localname = NULL;
9438 goto done;
9439 }
9440
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009441 /* Reconstruct attribute value pointers. */
9442 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9443 if (atts[i+2] != NULL) {
9444 /*
9445 * Arithmetic on dangling pointers is technically undefined
9446 * behavior, but well...
9447 */
9448 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9449 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9450 atts[i+3] += offset; /* value */
9451 atts[i+4] += offset; /* valuend */
9452 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009453 }
9454
Daniel Veillard0fb18932003-09-07 09:14:37 +00009455 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009456 * The attributes defaulting
9457 */
9458 if (ctxt->attsDefault != NULL) {
9459 xmlDefAttrsPtr defaults;
9460
9461 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9462 if (defaults != NULL) {
9463 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009464 attname = defaults->values[5 * i];
9465 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009466
9467 /*
9468 * special work for namespaces defaulted defs
9469 */
9470 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9471 /*
9472 * check that it's not a defined namespace
9473 */
9474 for (j = 1;j <= nbNs;j++)
9475 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9476 break;
9477 if (j <= nbNs) continue;
9478
9479 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009480 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009481 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009482 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009483 nbNs++;
9484 }
9485 } else if (aprefix == ctxt->str_xmlns) {
9486 /*
9487 * check that it's not a defined namespace
9488 */
9489 for (j = 1;j <= nbNs;j++)
9490 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9491 break;
9492 if (j <= nbNs) continue;
9493
9494 nsname = xmlGetNamespace(ctxt, attname);
9495 if (nsname != defaults->values[2]) {
9496 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009497 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009498 nbNs++;
9499 }
9500 } else {
9501 /*
9502 * check that it's not a defined attribute
9503 */
9504 for (j = 0;j < nbatts;j+=5) {
9505 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9506 break;
9507 }
9508 if (j < nbatts) continue;
9509
9510 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9511 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009512 localname = NULL;
9513 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009514 }
9515 maxatts = ctxt->maxatts;
9516 atts = ctxt->atts;
9517 }
9518 atts[nbatts++] = attname;
9519 atts[nbatts++] = aprefix;
9520 if (aprefix == NULL)
9521 atts[nbatts++] = NULL;
9522 else
9523 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009524 atts[nbatts++] = defaults->values[5 * i + 2];
9525 atts[nbatts++] = defaults->values[5 * i + 3];
9526 if ((ctxt->standalone == 1) &&
9527 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009528 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009529 "standalone: attribute %s on %s defaulted from external subset\n",
9530 attname, localname);
9531 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009532 nbdef++;
9533 }
9534 }
9535 }
9536 }
9537
Daniel Veillarde70c8772003-11-25 07:21:18 +00009538 /*
9539 * The attributes checkings
9540 */
9541 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009542 /*
9543 * The default namespace does not apply to attribute names.
9544 */
9545 if (atts[i + 1] != NULL) {
9546 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9547 if (nsname == NULL) {
9548 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9549 "Namespace prefix %s for %s on %s is not defined\n",
9550 atts[i + 1], atts[i], localname);
9551 }
9552 atts[i + 2] = nsname;
9553 } else
9554 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009555 /*
9556 * [ WFC: Unique Att Spec ]
9557 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009558 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009559 * As extended by the Namespace in XML REC.
9560 */
9561 for (j = 0; j < i;j += 5) {
9562 if (atts[i] == atts[j]) {
9563 if (atts[i+1] == atts[j+1]) {
9564 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9565 break;
9566 }
9567 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9568 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9569 "Namespaced Attribute %s in '%s' redefined\n",
9570 atts[i], nsname, NULL);
9571 break;
9572 }
9573 }
9574 }
9575 }
9576
Daniel Veillarde57ec792003-09-10 10:50:59 +00009577 nsname = xmlGetNamespace(ctxt, prefix);
9578 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009579 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9580 "Namespace prefix %s on %s is not defined\n",
9581 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009582 }
9583 *pref = prefix;
9584 *URI = nsname;
9585
9586 /*
9587 * SAX: Start of Element !
9588 */
9589 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9590 (!ctxt->disableSAX)) {
9591 if (nbNs > 0)
9592 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9593 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9594 nbatts / 5, nbdef, atts);
9595 else
9596 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9597 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9598 }
9599
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009600done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009601 /*
9602 * Free up attribute allocated strings if needed
9603 */
9604 if (attval != 0) {
9605 for (i = 3,j = 0; j < nratts;i += 5,j++)
9606 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9607 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009608 }
9609
9610 return(localname);
9611}
9612
9613/**
9614 * xmlParseEndTag2:
9615 * @ctxt: an XML parser context
9616 * @line: line of the start tag
9617 * @nsNr: number of namespaces on the start tag
9618 *
9619 * parse an end of tag
9620 *
9621 * [42] ETag ::= '</' Name S? '>'
9622 *
9623 * With namespace
9624 *
9625 * [NS 9] ETag ::= '</' QName S? '>'
9626 */
9627
9628static void
9629xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009630 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009631 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009632 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009633
9634 GROW;
9635 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009636 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009637 return;
9638 }
9639 SKIP(2);
9640
David Kilzerdb07dd62016-02-12 09:58:29 -08009641 curLength = ctxt->input->end - ctxt->input->cur;
9642 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9643 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9644 if ((curLength >= (size_t)(tlen + 1)) &&
9645 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009646 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009647 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009648 goto done;
9649 }
9650 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009651 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009652 name = (xmlChar*)1;
9653 } else {
9654 if (prefix == NULL)
9655 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9656 else
9657 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9658 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009659
9660 /*
9661 * We should definitely be at the ending "S? '>'" part
9662 */
9663 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009664 if (ctxt->instate == XML_PARSER_EOF)
9665 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009666 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009667 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009668 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009669 } else
9670 NEXT1;
9671
9672 /*
9673 * [ WFC: Element Type Match ]
9674 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009675 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009676 *
9677 */
9678 if (name != (xmlChar*)1) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009679 if (name == NULL) name = BAD_CAST "unparsable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009680 if ((line == 0) && (ctxt->node != NULL))
9681 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009682 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009683 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009684 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009685 }
9686
9687 /*
9688 * SAX: End of Tag
9689 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009690done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009691 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9692 (!ctxt->disableSAX))
9693 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9694
Daniel Veillard0fb18932003-09-07 09:14:37 +00009695 spacePop(ctxt);
9696 if (nsNr != 0)
9697 nsPop(ctxt, nsNr);
9698 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009699}
9700
9701/**
Owen Taylor3473f882001-02-23 17:55:21 +00009702 * xmlParseCDSect:
9703 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009704 *
Owen Taylor3473f882001-02-23 17:55:21 +00009705 * Parse escaped pure raw content.
9706 *
9707 * [18] CDSect ::= CDStart CData CDEnd
9708 *
9709 * [19] CDStart ::= '<![CDATA['
9710 *
9711 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9712 *
9713 * [21] CDEnd ::= ']]>'
9714 */
9715void
9716xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9717 xmlChar *buf = NULL;
9718 int len = 0;
9719 int size = XML_PARSER_BUFFER_SIZE;
9720 int r, rl;
9721 int s, sl;
9722 int cur, l;
9723 int count = 0;
9724
Daniel Veillard8f597c32003-10-06 08:19:27 +00009725 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009726 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009727 SKIP(9);
9728 } else
9729 return;
9730
9731 ctxt->instate = XML_PARSER_CDATA_SECTION;
9732 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009733 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009734 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009735 ctxt->instate = XML_PARSER_CONTENT;
9736 return;
9737 }
9738 NEXTL(rl);
9739 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009740 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009741 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009742 ctxt->instate = XML_PARSER_CONTENT;
9743 return;
9744 }
9745 NEXTL(sl);
9746 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009747 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009748 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009749 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009750 return;
9751 }
William M. Brack871611b2003-10-18 04:53:14 +00009752 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009753 ((r != ']') || (s != ']') || (cur != '>'))) {
9754 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009755 xmlChar *tmp;
9756
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009757 if ((size > XML_MAX_TEXT_LENGTH) &&
9758 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9759 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9760 "CData section too big found", NULL);
9761 xmlFree (buf);
9762 return;
9763 }
9764 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009765 if (tmp == NULL) {
9766 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009767 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009768 return;
9769 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009770 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009771 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009772 }
9773 COPY_BUF(rl,buf,len,r);
9774 r = s;
9775 rl = sl;
9776 s = cur;
9777 sl = l;
9778 count++;
9779 if (count > 50) {
9780 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009781 if (ctxt->instate == XML_PARSER_EOF) {
9782 xmlFree(buf);
9783 return;
9784 }
Owen Taylor3473f882001-02-23 17:55:21 +00009785 count = 0;
9786 }
9787 NEXTL(l);
9788 cur = CUR_CHAR(l);
9789 }
9790 buf[len] = 0;
9791 ctxt->instate = XML_PARSER_CONTENT;
9792 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009793 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009794 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009795 xmlFree(buf);
9796 return;
9797 }
9798 NEXTL(l);
9799
9800 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009801 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009802 */
9803 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9804 if (ctxt->sax->cdataBlock != NULL)
9805 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009806 else if (ctxt->sax->characters != NULL)
9807 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009808 }
9809 xmlFree(buf);
9810}
9811
9812/**
9813 * xmlParseContent:
9814 * @ctxt: an XML parser context
9815 *
9816 * Parse a content:
9817 *
9818 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9819 */
9820
9821void
9822xmlParseContent(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009823 int nameNr = ctxt->nameNr;
9824
Owen Taylor3473f882001-02-23 17:55:21 +00009825 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009826 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009827 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009828 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009829 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009830 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009831
9832 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009833 * First case : a Processing Instruction.
9834 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009835 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009836 xmlParsePI(ctxt);
9837 }
9838
9839 /*
9840 * Second case : a CDSection
9841 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009842 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009843 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009844 xmlParseCDSect(ctxt);
9845 }
9846
9847 /*
9848 * Third case : a comment
9849 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009850 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009851 (NXT(2) == '-') && (NXT(3) == '-')) {
9852 xmlParseComment(ctxt);
9853 ctxt->instate = XML_PARSER_CONTENT;
9854 }
9855
9856 /*
9857 * Fourth case : a sub-element.
9858 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009859 else if (*cur == '<') {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009860 if (NXT(1) == '/') {
9861 if (ctxt->nameNr <= nameNr)
9862 break;
9863 xmlParseElementEnd(ctxt);
9864 } else {
9865 xmlParseElementStart(ctxt);
9866 }
Owen Taylor3473f882001-02-23 17:55:21 +00009867 }
9868
9869 /*
9870 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009871 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009872 */
9873
Daniel Veillard21a0f912001-02-25 19:54:14 +00009874 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009875 xmlParseReference(ctxt);
9876 }
9877
9878 /*
9879 * Last case, text. Note that References are handled directly.
9880 */
9881 else {
9882 xmlParseCharData(ctxt, 0);
9883 }
9884
9885 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009886 SHRINK;
9887
Daniel Veillardfdc91562002-07-01 21:52:03 +00009888 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009889 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9890 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009891 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009892 break;
9893 }
9894 }
9895}
9896
9897/**
9898 * xmlParseElement:
9899 * @ctxt: an XML parser context
9900 *
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009901 * parse an XML element
Owen Taylor3473f882001-02-23 17:55:21 +00009902 *
9903 * [39] element ::= EmptyElemTag | STag content ETag
9904 *
9905 * [ WFC: Element Type Match ]
9906 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009907 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009908 *
Owen Taylor3473f882001-02-23 17:55:21 +00009909 */
9910
9911void
9912xmlParseElement(xmlParserCtxtPtr ctxt) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009913 if (xmlParseElementStart(ctxt) != 0)
9914 return;
9915 xmlParseContent(ctxt);
9916 if (ctxt->instate == XML_PARSER_EOF)
9917 return;
9918 xmlParseElementEnd(ctxt);
9919}
9920
9921/**
9922 * xmlParseElementStart:
9923 * @ctxt: an XML parser context
9924 *
9925 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9926 * opening tag was parsed, 1 if an empty element was parsed.
9927 */
9928static int
9929xmlParseElementStart(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009930 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009931 const xmlChar *prefix = NULL;
9932 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009933 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009934 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009935 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009936 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009937
Daniel Veillard8915c152008-08-26 13:05:34 +00009938 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9939 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9940 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9941 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9942 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08009943 xmlHaltParser(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009944 return(-1);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009945 }
9946
Owen Taylor3473f882001-02-23 17:55:21 +00009947 /* Capture start position */
9948 if (ctxt->record_info) {
9949 node_info.begin_pos = ctxt->input->consumed +
9950 (CUR_PTR - ctxt->input->base);
9951 node_info.begin_line = ctxt->input->line;
9952 }
9953
9954 if (ctxt->spaceNr == 0)
9955 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009956 else if (*ctxt->space == -2)
9957 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009958 else
9959 spacePush(ctxt, *ctxt->space);
9960
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009961 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009962#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009963 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009964#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009965 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009966#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009967 else
9968 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009969#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009970 if (ctxt->instate == XML_PARSER_EOF)
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009971 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00009972 if (name == NULL) {
9973 spacePop(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009974 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00009975 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -07009976 if (ctxt->sax2)
9977 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
9978#ifdef LIBXML_SAX1_ENABLED
9979 else
9980 namePush(ctxt, name);
9981#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009982 ret = ctxt->node;
9983
Daniel Veillard4432df22003-09-28 18:58:27 +00009984#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009985 /*
9986 * [ VC: Root Element Type ]
9987 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009988 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009989 */
9990 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9991 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9992 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009993#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009994
9995 /*
9996 * Check for an Empty Element.
9997 */
9998 if ((RAW == '/') && (NXT(1) == '>')) {
9999 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010000 if (ctxt->sax2) {
10001 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10002 (!ctxt->disableSAX))
10003 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010004#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010005 } else {
10006 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10007 (!ctxt->disableSAX))
10008 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010009#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010010 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010011 namePop(ctxt);
10012 spacePop(ctxt);
10013 if (nsNr != ctxt->nsNr)
10014 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010015 if ( ret != NULL && ctxt->record_info ) {
10016 node_info.end_pos = ctxt->input->consumed +
10017 (CUR_PTR - ctxt->input->base);
10018 node_info.end_line = ctxt->input->line;
10019 node_info.node = ret;
10020 xmlParserAddNodeInfo(ctxt, &node_info);
10021 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010022 return(1);
Owen Taylor3473f882001-02-23 17:55:21 +000010023 }
10024 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010025 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010026 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010027 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10028 "Couldn't find end of Start Tag %s line %d\n",
10029 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010030
10031 /*
10032 * end of parsing of this node.
10033 */
10034 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010035 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010036 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010037 if (nsNr != ctxt->nsNr)
10038 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010039
10040 /*
10041 * Capture end position and add node
10042 */
10043 if ( ret != NULL && ctxt->record_info ) {
10044 node_info.end_pos = ctxt->input->consumed +
10045 (CUR_PTR - ctxt->input->base);
10046 node_info.end_line = ctxt->input->line;
10047 node_info.node = ret;
10048 xmlParserAddNodeInfo(ctxt, &node_info);
10049 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010050 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010051 }
10052
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010053 return(0);
10054}
Owen Taylor3473f882001-02-23 17:55:21 +000010055
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010056/**
10057 * xmlParseElementEnd:
10058 * @ctxt: an XML parser context
10059 *
10060 * Parse the end of an XML element.
10061 */
10062static void
10063xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10064 xmlParserNodeInfo node_info;
10065 xmlNodePtr ret = ctxt->node;
10066
10067 if (ctxt->nameNr <= 0)
10068 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010069
10070 /*
10071 * parse the end of tag: '</' should be here.
10072 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010073 if (ctxt->sax2) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010074 const xmlChar *prefix = ctxt->pushTab[ctxt->nameNr * 3 - 3];
10075 const xmlChar *URI = ctxt->pushTab[ctxt->nameNr * 3 - 2];
10076 int nsNr = (ptrdiff_t) ctxt->pushTab[ctxt->nameNr * 3 - 1];
10077 xmlParseEndTag2(ctxt, prefix, URI, 0, nsNr, 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010078 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010079 }
10080#ifdef LIBXML_SAX1_ENABLED
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010081 else
10082 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010083#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010084
10085 /*
10086 * Capture end position and add node
10087 */
10088 if ( ret != NULL && ctxt->record_info ) {
10089 node_info.end_pos = ctxt->input->consumed +
10090 (CUR_PTR - ctxt->input->base);
10091 node_info.end_line = ctxt->input->line;
10092 node_info.node = ret;
10093 xmlParserAddNodeInfo(ctxt, &node_info);
10094 }
10095}
10096
10097/**
10098 * xmlParseVersionNum:
10099 * @ctxt: an XML parser context
10100 *
10101 * parse the XML version value.
10102 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010103 * [26] VersionNum ::= '1.' [0-9]+
10104 *
10105 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010106 *
10107 * Returns the string giving the XML version number, or NULL
10108 */
10109xmlChar *
10110xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10111 xmlChar *buf = NULL;
10112 int len = 0;
10113 int size = 10;
10114 xmlChar cur;
10115
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010116 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010117 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010118 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010119 return(NULL);
10120 }
10121 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010122 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010123 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010124 return(NULL);
10125 }
10126 buf[len++] = cur;
10127 NEXT;
10128 cur=CUR;
10129 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010130 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010131 return(NULL);
10132 }
10133 buf[len++] = cur;
10134 NEXT;
10135 cur=CUR;
10136 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010137 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010138 xmlChar *tmp;
10139
Owen Taylor3473f882001-02-23 17:55:21 +000010140 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010141 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10142 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010143 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010144 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010145 return(NULL);
10146 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010147 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010148 }
10149 buf[len++] = cur;
10150 NEXT;
10151 cur=CUR;
10152 }
10153 buf[len] = 0;
10154 return(buf);
10155}
10156
10157/**
10158 * xmlParseVersionInfo:
10159 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010160 *
Owen Taylor3473f882001-02-23 17:55:21 +000010161 * parse the XML version.
10162 *
10163 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010164 *
Owen Taylor3473f882001-02-23 17:55:21 +000010165 * [25] Eq ::= S? '=' S?
10166 *
10167 * Returns the version string, e.g. "1.0"
10168 */
10169
10170xmlChar *
10171xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10172 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010173
Daniel Veillarda07050d2003-10-19 14:46:32 +000010174 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010175 SKIP(7);
10176 SKIP_BLANKS;
10177 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010178 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010179 return(NULL);
10180 }
10181 NEXT;
10182 SKIP_BLANKS;
10183 if (RAW == '"') {
10184 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010185 version = xmlParseVersionNum(ctxt);
10186 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010187 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010188 } else
10189 NEXT;
10190 } else if (RAW == '\''){
10191 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010192 version = xmlParseVersionNum(ctxt);
10193 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010194 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010195 } else
10196 NEXT;
10197 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010198 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010199 }
10200 }
10201 return(version);
10202}
10203
10204/**
10205 * xmlParseEncName:
10206 * @ctxt: an XML parser context
10207 *
10208 * parse the XML encoding name
10209 *
10210 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10211 *
10212 * Returns the encoding name value or NULL
10213 */
10214xmlChar *
10215xmlParseEncName(xmlParserCtxtPtr ctxt) {
10216 xmlChar *buf = NULL;
10217 int len = 0;
10218 int size = 10;
10219 xmlChar cur;
10220
10221 cur = CUR;
10222 if (((cur >= 'a') && (cur <= 'z')) ||
10223 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010224 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010225 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010226 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010227 return(NULL);
10228 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010229
Owen Taylor3473f882001-02-23 17:55:21 +000010230 buf[len++] = cur;
10231 NEXT;
10232 cur = CUR;
10233 while (((cur >= 'a') && (cur <= 'z')) ||
10234 ((cur >= 'A') && (cur <= 'Z')) ||
10235 ((cur >= '0') && (cur <= '9')) ||
10236 (cur == '.') || (cur == '_') ||
10237 (cur == '-')) {
10238 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010239 xmlChar *tmp;
10240
Owen Taylor3473f882001-02-23 17:55:21 +000010241 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010242 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10243 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010244 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010245 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010246 return(NULL);
10247 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010248 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010249 }
10250 buf[len++] = cur;
10251 NEXT;
10252 cur = CUR;
10253 if (cur == 0) {
10254 SHRINK;
10255 GROW;
10256 cur = CUR;
10257 }
10258 }
10259 buf[len] = 0;
10260 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010261 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010262 }
10263 return(buf);
10264}
10265
10266/**
10267 * xmlParseEncodingDecl:
10268 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010269 *
Owen Taylor3473f882001-02-23 17:55:21 +000010270 * parse the XML encoding declaration
10271 *
10272 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10273 *
10274 * this setups the conversion filters.
10275 *
10276 * Returns the encoding value or NULL
10277 */
10278
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010279const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010280xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10281 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010282
10283 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010284 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010285 SKIP(8);
10286 SKIP_BLANKS;
10287 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010288 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010289 return(NULL);
10290 }
10291 NEXT;
10292 SKIP_BLANKS;
10293 if (RAW == '"') {
10294 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010295 encoding = xmlParseEncName(ctxt);
10296 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010297 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010298 xmlFree((xmlChar *) encoding);
10299 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010300 } else
10301 NEXT;
10302 } else if (RAW == '\''){
10303 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010304 encoding = xmlParseEncName(ctxt);
10305 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010306 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010307 xmlFree((xmlChar *) encoding);
10308 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010309 } else
10310 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010311 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010312 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010313 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010314
10315 /*
10316 * Non standard parsing, allowing the user to ignore encoding
10317 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010318 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10319 xmlFree((xmlChar *) encoding);
10320 return(NULL);
10321 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010322
Daniel Veillard6b621b82003-08-11 15:03:34 +000010323 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -070010324 * UTF-16 encoding switch has already taken place at this stage,
Daniel Veillard6b621b82003-08-11 15:03:34 +000010325 * more over the little-endian/big-endian selection is already done
10326 */
10327 if ((encoding != NULL) &&
10328 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10329 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010330 /*
10331 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010332 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010333 * document is apparently UTF-8 compatible, then raise an
10334 * encoding mismatch fatal error
10335 */
10336 if ((ctxt->encoding == NULL) &&
10337 (ctxt->input->buf != NULL) &&
10338 (ctxt->input->buf->encoder == NULL)) {
10339 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10340 "Document labelled UTF-16 but has UTF-8 content\n");
10341 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010342 if (ctxt->encoding != NULL)
10343 xmlFree((xmlChar *) ctxt->encoding);
10344 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010345 }
10346 /*
10347 * UTF-8 encoding is handled natively
10348 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010349 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010350 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10351 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010352 if (ctxt->encoding != NULL)
10353 xmlFree((xmlChar *) ctxt->encoding);
10354 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010355 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010356 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010357 xmlCharEncodingHandlerPtr handler;
10358
10359 if (ctxt->input->encoding != NULL)
10360 xmlFree((xmlChar *) ctxt->input->encoding);
10361 ctxt->input->encoding = encoding;
10362
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010363 handler = xmlFindCharEncodingHandler((const char *) encoding);
10364 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010365 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10366 /* failed to convert */
10367 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10368 return(NULL);
10369 }
Owen Taylor3473f882001-02-23 17:55:21 +000010370 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010371 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010372 "Unsupported encoding %s\n", encoding);
10373 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010374 }
10375 }
10376 }
10377 return(encoding);
10378}
10379
10380/**
10381 * xmlParseSDDecl:
10382 * @ctxt: an XML parser context
10383 *
10384 * parse the XML standalone declaration
10385 *
10386 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010387 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010388 *
10389 * [ VC: Standalone Document Declaration ]
10390 * TODO The standalone document declaration must have the value "no"
10391 * if any external markup declarations contain declarations of:
10392 * - attributes with default values, if elements to which these
10393 * attributes apply appear in the document without specifications
10394 * of values for these attributes, or
10395 * - entities (other than amp, lt, gt, apos, quot), if references
10396 * to those entities appear in the document, or
10397 * - attributes with values subject to normalization, where the
10398 * attribute appears in the document with a value which will change
10399 * as a result of normalization, or
10400 * - element types with element content, if white space occurs directly
10401 * within any instance of those types.
10402 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010403 * Returns:
10404 * 1 if standalone="yes"
10405 * 0 if standalone="no"
10406 * -2 if standalone attribute is missing or invalid
10407 * (A standalone value of -2 means that the XML declaration was found,
10408 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010409 */
10410
10411int
10412xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010413 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010414
10415 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010416 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010417 SKIP(10);
10418 SKIP_BLANKS;
10419 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010420 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010421 return(standalone);
10422 }
10423 NEXT;
10424 SKIP_BLANKS;
10425 if (RAW == '\''){
10426 NEXT;
10427 if ((RAW == 'n') && (NXT(1) == 'o')) {
10428 standalone = 0;
10429 SKIP(2);
10430 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10431 (NXT(2) == 's')) {
10432 standalone = 1;
10433 SKIP(3);
10434 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010435 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010436 }
10437 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010438 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010439 } else
10440 NEXT;
10441 } else if (RAW == '"'){
10442 NEXT;
10443 if ((RAW == 'n') && (NXT(1) == 'o')) {
10444 standalone = 0;
10445 SKIP(2);
10446 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10447 (NXT(2) == 's')) {
10448 standalone = 1;
10449 SKIP(3);
10450 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010451 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010452 }
10453 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010454 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010455 } else
10456 NEXT;
10457 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010458 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010459 }
10460 }
10461 return(standalone);
10462}
10463
10464/**
10465 * xmlParseXMLDecl:
10466 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010467 *
Owen Taylor3473f882001-02-23 17:55:21 +000010468 * parse an XML declaration header
10469 *
10470 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10471 */
10472
10473void
10474xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10475 xmlChar *version;
10476
10477 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010478 * This value for standalone indicates that the document has an
10479 * XML declaration but it does not have a standalone attribute.
10480 * It will be overwritten later if a standalone attribute is found.
10481 */
10482 ctxt->input->standalone = -2;
10483
10484 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010485 * We know that '<?xml' is here.
10486 */
10487 SKIP(5);
10488
William M. Brack76e95df2003-10-18 16:20:14 +000010489 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010490 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10491 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010492 }
10493 SKIP_BLANKS;
10494
10495 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010496 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010497 */
10498 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010499 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010500 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010501 } else {
10502 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10503 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010504 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010505 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010506 if (ctxt->options & XML_PARSE_OLD10) {
10507 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10508 "Unsupported version '%s'\n",
10509 version);
10510 } else {
10511 if ((version[0] == '1') && ((version[1] == '.'))) {
10512 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10513 "Unsupported version '%s'\n",
10514 version, NULL);
10515 } else {
10516 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10517 "Unsupported version '%s'\n",
10518 version);
10519 }
10520 }
Daniel Veillard19840942001-11-29 16:11:38 +000010521 }
10522 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010523 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010524 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010525 }
Owen Taylor3473f882001-02-23 17:55:21 +000010526
10527 /*
10528 * We may have the encoding declaration
10529 */
William M. Brack76e95df2003-10-18 16:20:14 +000010530 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010531 if ((RAW == '?') && (NXT(1) == '>')) {
10532 SKIP(2);
10533 return;
10534 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010535 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010536 }
10537 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010538 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10539 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010540 /*
10541 * The XML REC instructs us to stop parsing right here
10542 */
10543 return;
10544 }
10545
10546 /*
10547 * We may have the standalone status.
10548 */
William M. Brack76e95df2003-10-18 16:20:14 +000010549 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010550 if ((RAW == '?') && (NXT(1) == '>')) {
10551 SKIP(2);
10552 return;
10553 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010554 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010555 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010556
10557 /*
10558 * We can grow the input buffer freely at that point
10559 */
10560 GROW;
10561
Owen Taylor3473f882001-02-23 17:55:21 +000010562 SKIP_BLANKS;
10563 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10564
10565 SKIP_BLANKS;
10566 if ((RAW == '?') && (NXT(1) == '>')) {
10567 SKIP(2);
10568 } else if (RAW == '>') {
10569 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010570 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010571 NEXT;
10572 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010573 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010574 MOVETO_ENDTAG(CUR_PTR);
10575 NEXT;
10576 }
10577}
10578
10579/**
10580 * xmlParseMisc:
10581 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010582 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010583 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010584 *
10585 * [27] Misc ::= Comment | PI | S
10586 */
10587
10588void
10589xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010590 while ((ctxt->instate != XML_PARSER_EOF) &&
10591 (((RAW == '<') && (NXT(1) == '?')) ||
10592 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10593 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010594 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010595 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010596 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010597 NEXT;
10598 } else
10599 xmlParseComment(ctxt);
10600 }
10601}
10602
10603/**
10604 * xmlParseDocument:
10605 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010606 *
Owen Taylor3473f882001-02-23 17:55:21 +000010607 * parse an XML document (and build a tree if using the standard SAX
10608 * interface).
10609 *
10610 * [1] document ::= prolog element Misc*
10611 *
10612 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10613 *
10614 * Returns 0, -1 in case of error. the parser context is augmented
10615 * as a result of the parsing.
10616 */
10617
10618int
10619xmlParseDocument(xmlParserCtxtPtr ctxt) {
10620 xmlChar start[4];
10621 xmlCharEncoding enc;
10622
10623 xmlInitParser();
10624
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010625 if ((ctxt == NULL) || (ctxt->input == NULL))
10626 return(-1);
10627
Owen Taylor3473f882001-02-23 17:55:21 +000010628 GROW;
10629
10630 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010631 * SAX: detecting the level.
10632 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010633 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010634
10635 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010636 * SAX: beginning of the document processing.
10637 */
10638 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10639 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010640 if (ctxt->instate == XML_PARSER_EOF)
10641 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010642
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010643 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010644 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010645 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010646 * Get the 4 first bytes and decode the charset
10647 * if enc != XML_CHAR_ENCODING_NONE
10648 * plug some encoding conversion routines.
10649 */
10650 start[0] = RAW;
10651 start[1] = NXT(1);
10652 start[2] = NXT(2);
10653 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010654 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010655 if (enc != XML_CHAR_ENCODING_NONE) {
10656 xmlSwitchEncoding(ctxt, enc);
10657 }
Owen Taylor3473f882001-02-23 17:55:21 +000010658 }
10659
10660
10661 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010662 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010663 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010664 }
10665
10666 /*
10667 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010668 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010669 * than just the first line, unless the amount of data is really
10670 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010671 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010672 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10673 GROW;
10674 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010675 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010676
10677 /*
10678 * Note that we will switch encoding on the fly.
10679 */
10680 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010681 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10682 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010683 /*
10684 * The XML REC instructs us to stop parsing right here
10685 */
10686 return(-1);
10687 }
10688 ctxt->standalone = ctxt->input->standalone;
10689 SKIP_BLANKS;
10690 } else {
10691 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10692 }
10693 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10694 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010695 if (ctxt->instate == XML_PARSER_EOF)
10696 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010697 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10698 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10699 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10700 }
Owen Taylor3473f882001-02-23 17:55:21 +000010701
10702 /*
10703 * The Misc part of the Prolog
10704 */
10705 GROW;
10706 xmlParseMisc(ctxt);
10707
10708 /*
10709 * Then possibly doc type declaration(s) and more Misc
10710 * (doctypedecl Misc*)?
10711 */
10712 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010713 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010714
10715 ctxt->inSubset = 1;
10716 xmlParseDocTypeDecl(ctxt);
10717 if (RAW == '[') {
10718 ctxt->instate = XML_PARSER_DTD;
10719 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010720 if (ctxt->instate == XML_PARSER_EOF)
10721 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010722 }
10723
10724 /*
10725 * Create and update the external subset.
10726 */
10727 ctxt->inSubset = 2;
10728 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10729 (!ctxt->disableSAX))
10730 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10731 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010732 if (ctxt->instate == XML_PARSER_EOF)
10733 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010734 ctxt->inSubset = 0;
10735
Daniel Veillardac4118d2008-01-11 05:27:32 +000010736 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010737
10738 ctxt->instate = XML_PARSER_PROLOG;
10739 xmlParseMisc(ctxt);
10740 }
10741
10742 /*
10743 * Time to start parsing the tree itself
10744 */
10745 GROW;
10746 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010747 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10748 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010749 } else {
10750 ctxt->instate = XML_PARSER_CONTENT;
10751 xmlParseElement(ctxt);
10752 ctxt->instate = XML_PARSER_EPILOG;
10753
10754
10755 /*
10756 * The Misc part at the end
10757 */
10758 xmlParseMisc(ctxt);
10759
Daniel Veillard561b7f82002-03-20 21:55:57 +000010760 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010761 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010762 }
10763 ctxt->instate = XML_PARSER_EOF;
10764 }
10765
10766 /*
10767 * SAX: end of the document processing.
10768 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010769 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010770 ctxt->sax->endDocument(ctxt->userData);
10771
Daniel Veillard5997aca2002-03-18 18:36:20 +000010772 /*
10773 * Remove locally kept entity definitions if the tree was not built
10774 */
10775 if ((ctxt->myDoc != NULL) &&
10776 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10777 xmlFreeDoc(ctxt->myDoc);
10778 ctxt->myDoc = NULL;
10779 }
10780
Daniel Veillardae0765b2008-07-31 19:54:59 +000010781 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10782 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10783 if (ctxt->valid)
10784 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10785 if (ctxt->nsWellFormed)
10786 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10787 if (ctxt->options & XML_PARSE_OLD10)
10788 ctxt->myDoc->properties |= XML_DOC_OLD10;
10789 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010790 if (! ctxt->wellFormed) {
10791 ctxt->valid = 0;
10792 return(-1);
10793 }
Owen Taylor3473f882001-02-23 17:55:21 +000010794 return(0);
10795}
10796
10797/**
10798 * xmlParseExtParsedEnt:
10799 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010800 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010801 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010802 * An external general parsed entity is well-formed if it matches the
10803 * production labeled extParsedEnt.
10804 *
10805 * [78] extParsedEnt ::= TextDecl? content
10806 *
10807 * Returns 0, -1 in case of error. the parser context is augmented
10808 * as a result of the parsing.
10809 */
10810
10811int
10812xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10813 xmlChar start[4];
10814 xmlCharEncoding enc;
10815
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010816 if ((ctxt == NULL) || (ctxt->input == NULL))
10817 return(-1);
10818
Owen Taylor3473f882001-02-23 17:55:21 +000010819 xmlDefaultSAXHandlerInit();
10820
Daniel Veillard309f81d2003-09-23 09:02:53 +000010821 xmlDetectSAX2(ctxt);
10822
Owen Taylor3473f882001-02-23 17:55:21 +000010823 GROW;
10824
10825 /*
10826 * SAX: beginning of the document processing.
10827 */
10828 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10829 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10830
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010831 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010832 * Get the 4 first bytes and decode the charset
10833 * if enc != XML_CHAR_ENCODING_NONE
10834 * plug some encoding conversion routines.
10835 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010836 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10837 start[0] = RAW;
10838 start[1] = NXT(1);
10839 start[2] = NXT(2);
10840 start[3] = NXT(3);
10841 enc = xmlDetectCharEncoding(start, 4);
10842 if (enc != XML_CHAR_ENCODING_NONE) {
10843 xmlSwitchEncoding(ctxt, enc);
10844 }
Owen Taylor3473f882001-02-23 17:55:21 +000010845 }
10846
10847
10848 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010849 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010850 }
10851
10852 /*
10853 * Check for the XMLDecl in the Prolog.
10854 */
10855 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010856 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010857
10858 /*
10859 * Note that we will switch encoding on the fly.
10860 */
10861 xmlParseXMLDecl(ctxt);
10862 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10863 /*
10864 * The XML REC instructs us to stop parsing right here
10865 */
10866 return(-1);
10867 }
10868 SKIP_BLANKS;
10869 } else {
10870 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10871 }
10872 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10873 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010874 if (ctxt->instate == XML_PARSER_EOF)
10875 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010876
10877 /*
10878 * Doing validity checking on chunk doesn't make sense
10879 */
10880 ctxt->instate = XML_PARSER_CONTENT;
10881 ctxt->validate = 0;
10882 ctxt->loadsubset = 0;
10883 ctxt->depth = 0;
10884
10885 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010886 if (ctxt->instate == XML_PARSER_EOF)
10887 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010888
Owen Taylor3473f882001-02-23 17:55:21 +000010889 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010890 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010891 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010892 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010893 }
10894
10895 /*
10896 * SAX: end of the document processing.
10897 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010898 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010899 ctxt->sax->endDocument(ctxt->userData);
10900
10901 if (! ctxt->wellFormed) return(-1);
10902 return(0);
10903}
10904
Daniel Veillard73b013f2003-09-30 12:36:01 +000010905#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010906/************************************************************************
10907 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010908 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010909 * *
10910 ************************************************************************/
10911
10912/**
10913 * xmlParseLookupSequence:
10914 * @ctxt: an XML parser context
10915 * @first: the first char to lookup
10916 * @next: the next char to lookup or zero
10917 * @third: the next char to lookup or zero
10918 *
10919 * Try to find if a sequence (first, next, third) or just (first next) or
10920 * (first) is available in the input stream.
10921 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10922 * to avoid rescanning sequences of bytes, it DOES change the state of the
10923 * parser, do not use liberally.
10924 *
10925 * Returns the index to the current parsing point if the full sequence
10926 * is available, -1 otherwise.
10927 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010928static int
Owen Taylor3473f882001-02-23 17:55:21 +000010929xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10930 xmlChar next, xmlChar third) {
10931 int base, len;
10932 xmlParserInputPtr in;
10933 const xmlChar *buf;
10934
10935 in = ctxt->input;
10936 if (in == NULL) return(-1);
10937 base = in->cur - in->base;
10938 if (base < 0) return(-1);
10939 if (ctxt->checkIndex > base)
10940 base = ctxt->checkIndex;
10941 if (in->buf == NULL) {
10942 buf = in->base;
10943 len = in->length;
10944 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010945 buf = xmlBufContent(in->buf->buffer);
10946 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010947 }
10948 /* take into account the sequence length */
10949 if (third) len -= 2;
10950 else if (next) len --;
10951 for (;base < len;base++) {
10952 if (buf[base] == first) {
10953 if (third != 0) {
10954 if ((buf[base + 1] != next) ||
10955 (buf[base + 2] != third)) continue;
10956 } else if (next != 0) {
10957 if (buf[base + 1] != next) continue;
10958 }
10959 ctxt->checkIndex = 0;
10960#ifdef DEBUG_PUSH
10961 if (next == 0)
10962 xmlGenericError(xmlGenericErrorContext,
10963 "PP: lookup '%c' found at %d\n",
10964 first, base);
10965 else if (third == 0)
10966 xmlGenericError(xmlGenericErrorContext,
10967 "PP: lookup '%c%c' found at %d\n",
10968 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010969 else
Owen Taylor3473f882001-02-23 17:55:21 +000010970 xmlGenericError(xmlGenericErrorContext,
10971 "PP: lookup '%c%c%c' found at %d\n",
10972 first, next, third, base);
10973#endif
10974 return(base - (in->cur - in->base));
10975 }
10976 }
10977 ctxt->checkIndex = base;
10978#ifdef DEBUG_PUSH
10979 if (next == 0)
10980 xmlGenericError(xmlGenericErrorContext,
10981 "PP: lookup '%c' failed\n", first);
10982 else if (third == 0)
10983 xmlGenericError(xmlGenericErrorContext,
10984 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010985 else
Owen Taylor3473f882001-02-23 17:55:21 +000010986 xmlGenericError(xmlGenericErrorContext,
10987 "PP: lookup '%c%c%c' failed\n", first, next, third);
10988#endif
10989 return(-1);
10990}
10991
10992/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010993 * xmlParseGetLasts:
10994 * @ctxt: an XML parser context
10995 * @lastlt: pointer to store the last '<' from the input
10996 * @lastgt: pointer to store the last '>' from the input
10997 *
10998 * Lookup the last < and > in the current chunk
10999 */
11000static void
11001xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11002 const xmlChar **lastgt) {
11003 const xmlChar *tmp;
11004
11005 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11006 xmlGenericError(xmlGenericErrorContext,
11007 "Internal error: xmlParseGetLasts\n");
11008 return;
11009 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011010 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011011 tmp = ctxt->input->end;
11012 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011013 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011014 if (tmp < ctxt->input->base) {
11015 *lastlt = NULL;
11016 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011017 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011018 *lastlt = tmp;
11019 tmp++;
11020 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11021 if (*tmp == '\'') {
11022 tmp++;
11023 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11024 if (tmp < ctxt->input->end) tmp++;
11025 } else if (*tmp == '"') {
11026 tmp++;
11027 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11028 if (tmp < ctxt->input->end) tmp++;
11029 } else
11030 tmp++;
11031 }
11032 if (tmp < ctxt->input->end)
11033 *lastgt = tmp;
11034 else {
11035 tmp = *lastlt;
11036 tmp--;
11037 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11038 if (tmp >= ctxt->input->base)
11039 *lastgt = tmp;
11040 else
11041 *lastgt = NULL;
11042 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011043 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011044 } else {
11045 *lastlt = NULL;
11046 *lastgt = NULL;
11047 }
11048}
11049/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011050 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011051 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011052 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011053 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011054 *
11055 * Check that the block of characters is okay as SCdata content [20]
11056 *
11057 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011058 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011059 */
11060static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011061xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011062 int ix;
11063 unsigned char c;
11064 int codepoint;
11065
11066 if ((utf == NULL) || (len <= 0))
11067 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011068
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011069 for (ix = 0; ix < len;) { /* string is 0-terminated */
11070 c = utf[ix];
11071 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11072 if (c >= 0x20)
11073 ix++;
11074 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11075 ix++;
11076 else
11077 return(-ix);
11078 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011079 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011080 if ((utf[ix+1] & 0xc0 ) != 0x80)
11081 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011082 codepoint = (utf[ix] & 0x1f) << 6;
11083 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011084 if (!xmlIsCharQ(codepoint))
11085 return(-ix);
11086 ix += 2;
11087 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011088 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011089 if (((utf[ix+1] & 0xc0) != 0x80) ||
11090 ((utf[ix+2] & 0xc0) != 0x80))
11091 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011092 codepoint = (utf[ix] & 0xf) << 12;
11093 codepoint |= (utf[ix+1] & 0x3f) << 6;
11094 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011095 if (!xmlIsCharQ(codepoint))
11096 return(-ix);
11097 ix += 3;
11098 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011099 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011100 if (((utf[ix+1] & 0xc0) != 0x80) ||
11101 ((utf[ix+2] & 0xc0) != 0x80) ||
11102 ((utf[ix+3] & 0xc0) != 0x80))
11103 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011104 codepoint = (utf[ix] & 0x7) << 18;
11105 codepoint |= (utf[ix+1] & 0x3f) << 12;
11106 codepoint |= (utf[ix+2] & 0x3f) << 6;
11107 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011108 if (!xmlIsCharQ(codepoint))
11109 return(-ix);
11110 ix += 4;
11111 } else /* unknown encoding */
11112 return(-ix);
11113 }
11114 return(ix);
11115}
11116
11117/**
Owen Taylor3473f882001-02-23 17:55:21 +000011118 * xmlParseTryOrFinish:
11119 * @ctxt: an XML parser context
11120 * @terminate: last chunk indicator
11121 *
11122 * Try to progress on parsing
11123 *
11124 * Returns zero if no parsing was possible
11125 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011126static int
Owen Taylor3473f882001-02-23 17:55:21 +000011127xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11128 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011129 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011130 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011131 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011132
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011133 if (ctxt->input == NULL)
11134 return(0);
11135
Owen Taylor3473f882001-02-23 17:55:21 +000011136#ifdef DEBUG_PUSH
11137 switch (ctxt->instate) {
11138 case XML_PARSER_EOF:
11139 xmlGenericError(xmlGenericErrorContext,
11140 "PP: try EOF\n"); break;
11141 case XML_PARSER_START:
11142 xmlGenericError(xmlGenericErrorContext,
11143 "PP: try START\n"); break;
11144 case XML_PARSER_MISC:
11145 xmlGenericError(xmlGenericErrorContext,
11146 "PP: try MISC\n");break;
11147 case XML_PARSER_COMMENT:
11148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: try COMMENT\n");break;
11150 case XML_PARSER_PROLOG:
11151 xmlGenericError(xmlGenericErrorContext,
11152 "PP: try PROLOG\n");break;
11153 case XML_PARSER_START_TAG:
11154 xmlGenericError(xmlGenericErrorContext,
11155 "PP: try START_TAG\n");break;
11156 case XML_PARSER_CONTENT:
11157 xmlGenericError(xmlGenericErrorContext,
11158 "PP: try CONTENT\n");break;
11159 case XML_PARSER_CDATA_SECTION:
11160 xmlGenericError(xmlGenericErrorContext,
11161 "PP: try CDATA_SECTION\n");break;
11162 case XML_PARSER_END_TAG:
11163 xmlGenericError(xmlGenericErrorContext,
11164 "PP: try END_TAG\n");break;
11165 case XML_PARSER_ENTITY_DECL:
11166 xmlGenericError(xmlGenericErrorContext,
11167 "PP: try ENTITY_DECL\n");break;
11168 case XML_PARSER_ENTITY_VALUE:
11169 xmlGenericError(xmlGenericErrorContext,
11170 "PP: try ENTITY_VALUE\n");break;
11171 case XML_PARSER_ATTRIBUTE_VALUE:
11172 xmlGenericError(xmlGenericErrorContext,
11173 "PP: try ATTRIBUTE_VALUE\n");break;
11174 case XML_PARSER_DTD:
11175 xmlGenericError(xmlGenericErrorContext,
11176 "PP: try DTD\n");break;
11177 case XML_PARSER_EPILOG:
11178 xmlGenericError(xmlGenericErrorContext,
11179 "PP: try EPILOG\n");break;
11180 case XML_PARSER_PI:
11181 xmlGenericError(xmlGenericErrorContext,
11182 "PP: try PI\n");break;
11183 case XML_PARSER_IGNORE:
11184 xmlGenericError(xmlGenericErrorContext,
11185 "PP: try IGNORE\n");break;
11186 }
11187#endif
11188
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011189 if ((ctxt->input != NULL) &&
11190 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011191 xmlSHRINK(ctxt);
11192 ctxt->checkIndex = 0;
11193 }
11194 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011195
Daniel Veillarde50ba812013-04-11 15:54:51 +080011196 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011197 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011198 return(0);
11199
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011200 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011201 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011202 avail = ctxt->input->length -
11203 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011204 else {
11205 /*
11206 * If we are operating on converted input, try to flush
Haibo Huangcfd91dc2020-07-30 23:01:33 -070011207 * remaining chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011208 * buffer. But do not do this in document start where
11209 * encoding="..." may not have been read and we work on a
11210 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011211 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011212 if ((ctxt->instate != XML_PARSER_START) &&
11213 (ctxt->input->buf->raw != NULL) &&
11214 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011215 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11216 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011217 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011218
11219 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011220 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11221 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011222 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011223 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011224 (ctxt->input->cur - ctxt->input->base);
11225 }
Owen Taylor3473f882001-02-23 17:55:21 +000011226 if (avail < 1)
11227 goto done;
11228 switch (ctxt->instate) {
11229 case XML_PARSER_EOF:
11230 /*
11231 * Document parsing is done !
11232 */
11233 goto done;
11234 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011235 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11236 xmlChar start[4];
11237 xmlCharEncoding enc;
11238
11239 /*
11240 * Very first chars read from the document flow.
11241 */
11242 if (avail < 4)
11243 goto done;
11244
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011245 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011246 * Get the 4 first bytes and decode the charset
11247 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011248 * plug some encoding conversion routines,
11249 * else xmlSwitchEncoding will set to (default)
11250 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011251 */
11252 start[0] = RAW;
11253 start[1] = NXT(1);
11254 start[2] = NXT(2);
11255 start[3] = NXT(3);
11256 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011257 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011258 break;
11259 }
Owen Taylor3473f882001-02-23 17:55:21 +000011260
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011261 if (avail < 2)
11262 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011263 cur = ctxt->input->cur[0];
11264 next = ctxt->input->cur[1];
11265 if (cur == 0) {
11266 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11267 ctxt->sax->setDocumentLocator(ctxt->userData,
11268 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011269 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011270 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011271#ifdef DEBUG_PUSH
11272 xmlGenericError(xmlGenericErrorContext,
11273 "PP: entering EOF\n");
11274#endif
11275 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11276 ctxt->sax->endDocument(ctxt->userData);
11277 goto done;
11278 }
11279 if ((cur == '<') && (next == '?')) {
11280 /* PI or XML decl */
11281 if (avail < 5) return(ret);
11282 if ((!terminate) &&
11283 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11284 return(ret);
11285 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11286 ctxt->sax->setDocumentLocator(ctxt->userData,
11287 &xmlDefaultSAXLocator);
11288 if ((ctxt->input->cur[2] == 'x') &&
11289 (ctxt->input->cur[3] == 'm') &&
11290 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011291 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011292 ret += 5;
11293#ifdef DEBUG_PUSH
11294 xmlGenericError(xmlGenericErrorContext,
11295 "PP: Parsing XML Decl\n");
11296#endif
11297 xmlParseXMLDecl(ctxt);
11298 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11299 /*
11300 * The XML REC instructs us to stop parsing right
11301 * here
11302 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011303 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011304 return(0);
11305 }
11306 ctxt->standalone = ctxt->input->standalone;
11307 if ((ctxt->encoding == NULL) &&
11308 (ctxt->input->encoding != NULL))
11309 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11310 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11311 (!ctxt->disableSAX))
11312 ctxt->sax->startDocument(ctxt->userData);
11313 ctxt->instate = XML_PARSER_MISC;
11314#ifdef DEBUG_PUSH
11315 xmlGenericError(xmlGenericErrorContext,
11316 "PP: entering MISC\n");
11317#endif
11318 } else {
11319 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11320 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11321 (!ctxt->disableSAX))
11322 ctxt->sax->startDocument(ctxt->userData);
11323 ctxt->instate = XML_PARSER_MISC;
11324#ifdef DEBUG_PUSH
11325 xmlGenericError(xmlGenericErrorContext,
11326 "PP: entering MISC\n");
11327#endif
11328 }
11329 } else {
11330 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11331 ctxt->sax->setDocumentLocator(ctxt->userData,
11332 &xmlDefaultSAXLocator);
11333 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011334 if (ctxt->version == NULL) {
11335 xmlErrMemory(ctxt, NULL);
11336 break;
11337 }
Owen Taylor3473f882001-02-23 17:55:21 +000011338 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11339 (!ctxt->disableSAX))
11340 ctxt->sax->startDocument(ctxt->userData);
11341 ctxt->instate = XML_PARSER_MISC;
11342#ifdef DEBUG_PUSH
11343 xmlGenericError(xmlGenericErrorContext,
11344 "PP: entering MISC\n");
11345#endif
11346 }
11347 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011348 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011349 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011350 const xmlChar *prefix = NULL;
11351 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011352 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011353
11354 if ((avail < 2) && (ctxt->inputNr == 1))
11355 goto done;
11356 cur = ctxt->input->cur[0];
11357 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011358 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011359 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011360 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11361 ctxt->sax->endDocument(ctxt->userData);
11362 goto done;
11363 }
11364 if (!terminate) {
11365 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011366 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011367 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011368 goto done;
11369 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11370 goto done;
11371 }
11372 }
11373 if (ctxt->spaceNr == 0)
11374 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011375 else if (*ctxt->space == -2)
11376 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011377 else
11378 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011379#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011380 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011381#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011382 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011383#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011384 else
11385 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011386#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011387 if (ctxt->instate == XML_PARSER_EOF)
11388 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011389 if (name == NULL) {
11390 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011391 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011392 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11393 ctxt->sax->endDocument(ctxt->userData);
11394 goto done;
11395 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011396#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011397 /*
11398 * [ VC: Root Element Type ]
11399 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011400 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011401 */
11402 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11403 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11404 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011405#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011406
11407 /*
11408 * Check for an Empty Element.
11409 */
11410 if ((RAW == '/') && (NXT(1) == '>')) {
11411 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011412
11413 if (ctxt->sax2) {
11414 if ((ctxt->sax != NULL) &&
11415 (ctxt->sax->endElementNs != NULL) &&
11416 (!ctxt->disableSAX))
11417 ctxt->sax->endElementNs(ctxt->userData, name,
11418 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011419 if (ctxt->nsNr - nsNr > 0)
11420 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011421#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011422 } else {
11423 if ((ctxt->sax != NULL) &&
11424 (ctxt->sax->endElement != NULL) &&
11425 (!ctxt->disableSAX))
11426 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011427#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011428 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011429 if (ctxt->instate == XML_PARSER_EOF)
11430 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011431 spacePop(ctxt);
11432 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011433 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011434 } else {
11435 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011436 }
Daniel Veillard65686452012-07-19 18:25:01 +080011437 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011438 break;
11439 }
11440 if (RAW == '>') {
11441 NEXT;
11442 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011443 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011444 "Couldn't find end of Start Tag %s\n",
11445 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011446 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011447 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011448 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011449 if (ctxt->sax2)
11450 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011451#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011452 else
11453 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011454#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011455
Daniel Veillarda880b122003-04-21 21:36:41 +000011456 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011457 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011458 break;
11459 }
11460 case XML_PARSER_CONTENT: {
11461 const xmlChar *test;
11462 unsigned int cons;
11463 if ((avail < 2) && (ctxt->inputNr == 1))
11464 goto done;
11465 cur = ctxt->input->cur[0];
11466 next = ctxt->input->cur[1];
11467
11468 test = CUR_PTR;
11469 cons = ctxt->input->consumed;
11470 if ((cur == '<') && (next == '/')) {
11471 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011472 break;
11473 } else if ((cur == '<') && (next == '?')) {
11474 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011475 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11476 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011477 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011478 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011479 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011480 ctxt->instate = XML_PARSER_CONTENT;
11481 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011482 } else if ((cur == '<') && (next != '!')) {
11483 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011484 break;
11485 } else if ((cur == '<') && (next == '!') &&
11486 (ctxt->input->cur[2] == '-') &&
11487 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011488 int term;
11489
11490 if (avail < 4)
11491 goto done;
11492 ctxt->input->cur += 4;
11493 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11494 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011495 if ((!terminate) && (term < 0)) {
11496 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011497 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011498 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011499 xmlParseComment(ctxt);
11500 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011501 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011502 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11503 (ctxt->input->cur[2] == '[') &&
11504 (ctxt->input->cur[3] == 'C') &&
11505 (ctxt->input->cur[4] == 'D') &&
11506 (ctxt->input->cur[5] == 'A') &&
11507 (ctxt->input->cur[6] == 'T') &&
11508 (ctxt->input->cur[7] == 'A') &&
11509 (ctxt->input->cur[8] == '[')) {
11510 SKIP(9);
11511 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011512 break;
11513 } else if ((cur == '<') && (next == '!') &&
11514 (avail < 9)) {
11515 goto done;
11516 } else if (cur == '&') {
11517 if ((!terminate) &&
11518 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11519 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011520 xmlParseReference(ctxt);
11521 } else {
11522 /* TODO Avoid the extra copy, handle directly !!! */
11523 /*
11524 * Goal of the following test is:
11525 * - minimize calls to the SAX 'character' callback
11526 * when they are mergeable
11527 * - handle an problem for isBlank when we only parse
11528 * a sequence of blank chars and the next one is
11529 * not available to check against '<' presence.
11530 * - tries to homogenize the differences in SAX
11531 * callbacks between the push and pull versions
11532 * of the parser.
11533 */
11534 if ((ctxt->inputNr == 1) &&
11535 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11536 if (!terminate) {
11537 if (ctxt->progressive) {
11538 if ((lastlt == NULL) ||
11539 (ctxt->input->cur > lastlt))
11540 goto done;
11541 } else if (xmlParseLookupSequence(ctxt,
11542 '<', 0, 0) < 0) {
11543 goto done;
11544 }
11545 }
11546 }
11547 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 xmlParseCharData(ctxt, 0);
11549 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011550 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011551 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11552 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011553 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011554 break;
11555 }
11556 break;
11557 }
11558 case XML_PARSER_END_TAG:
11559 if (avail < 2)
11560 goto done;
11561 if (!terminate) {
11562 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011563 /* > can be found unescaped in attribute values */
11564 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011565 goto done;
11566 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11567 goto done;
11568 }
11569 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011570 if (ctxt->sax2) {
11571 xmlParseEndTag2(ctxt,
Nick Wellnhoferd422b952017-10-09 13:37:42 +020011572 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11573 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11574 (int) (ptrdiff_t)
11575 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011576 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011577 }
11578#ifdef LIBXML_SAX1_ENABLED
11579 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011580 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011581#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011582 if (ctxt->instate == XML_PARSER_EOF) {
11583 /* Nothing */
11584 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011585 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011586 } else {
11587 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011588 }
11589 break;
11590 case XML_PARSER_CDATA_SECTION: {
11591 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011592 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011593 * cdataBlock merge back contiguous callbacks.
11594 */
11595 int base;
11596
11597 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11598 if (base < 0) {
11599 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011600 int tmp;
11601
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011602 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011603 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011604 if (tmp < 0) {
11605 tmp = -tmp;
11606 ctxt->input->cur += tmp;
11607 goto encoding_error;
11608 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011609 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11610 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011611 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011612 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011613 else if (ctxt->sax->characters != NULL)
11614 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011615 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011616 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011617 if (ctxt->instate == XML_PARSER_EOF)
11618 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011619 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011620 ctxt->checkIndex = 0;
11621 }
11622 goto done;
11623 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011624 int tmp;
11625
David Kilzer4f8606c2016-01-05 13:38:09 -080011626 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011627 if ((tmp < 0) || (tmp != base)) {
11628 tmp = -tmp;
11629 ctxt->input->cur += tmp;
11630 goto encoding_error;
11631 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011632 if ((ctxt->sax != NULL) && (base == 0) &&
11633 (ctxt->sax->cdataBlock != NULL) &&
11634 (!ctxt->disableSAX)) {
11635 /*
11636 * Special case to provide identical behaviour
11637 * between pull and push parsers on enpty CDATA
11638 * sections
11639 */
11640 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11641 (!strncmp((const char *)&ctxt->input->cur[-9],
11642 "<![CDATA[", 9)))
11643 ctxt->sax->cdataBlock(ctxt->userData,
11644 BAD_CAST "", 0);
11645 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011646 (!ctxt->disableSAX)) {
11647 if (ctxt->sax->cdataBlock != NULL)
11648 ctxt->sax->cdataBlock(ctxt->userData,
11649 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011650 else if (ctxt->sax->characters != NULL)
11651 ctxt->sax->characters(ctxt->userData,
11652 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011653 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011654 if (ctxt->instate == XML_PARSER_EOF)
11655 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011656 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011657 ctxt->checkIndex = 0;
11658 ctxt->instate = XML_PARSER_CONTENT;
11659#ifdef DEBUG_PUSH
11660 xmlGenericError(xmlGenericErrorContext,
11661 "PP: entering CONTENT\n");
11662#endif
11663 }
11664 break;
11665 }
Owen Taylor3473f882001-02-23 17:55:21 +000011666 case XML_PARSER_MISC:
11667 SKIP_BLANKS;
11668 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011669 avail = ctxt->input->length -
11670 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011671 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011672 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011673 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011674 if (avail < 2)
11675 goto done;
11676 cur = ctxt->input->cur[0];
11677 next = ctxt->input->cur[1];
11678 if ((cur == '<') && (next == '?')) {
11679 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011680 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11681 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011682 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011683 }
Owen Taylor3473f882001-02-23 17:55:21 +000011684#ifdef DEBUG_PUSH
11685 xmlGenericError(xmlGenericErrorContext,
11686 "PP: Parsing PI\n");
11687#endif
11688 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011689 if (ctxt->instate == XML_PARSER_EOF)
11690 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011691 ctxt->instate = XML_PARSER_MISC;
11692 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011693 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011694 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011695 (ctxt->input->cur[2] == '-') &&
11696 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011697 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011698 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11699 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011700 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011701 }
Owen Taylor3473f882001-02-23 17:55:21 +000011702#ifdef DEBUG_PUSH
11703 xmlGenericError(xmlGenericErrorContext,
11704 "PP: Parsing Comment\n");
11705#endif
11706 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011707 if (ctxt->instate == XML_PARSER_EOF)
11708 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011709 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011710 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011711 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011712 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011713 (ctxt->input->cur[2] == 'D') &&
11714 (ctxt->input->cur[3] == 'O') &&
11715 (ctxt->input->cur[4] == 'C') &&
11716 (ctxt->input->cur[5] == 'T') &&
11717 (ctxt->input->cur[6] == 'Y') &&
11718 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011719 (ctxt->input->cur[8] == 'E')) {
11720 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011721 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11722 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011723 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011724 }
Owen Taylor3473f882001-02-23 17:55:21 +000011725#ifdef DEBUG_PUSH
11726 xmlGenericError(xmlGenericErrorContext,
11727 "PP: Parsing internal subset\n");
11728#endif
11729 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011730 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011731 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011732 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011733 if (ctxt->instate == XML_PARSER_EOF)
11734 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011735 if (RAW == '[') {
11736 ctxt->instate = XML_PARSER_DTD;
11737#ifdef DEBUG_PUSH
11738 xmlGenericError(xmlGenericErrorContext,
11739 "PP: entering DTD\n");
11740#endif
11741 } else {
11742 /*
11743 * Create and update the external subset.
11744 */
11745 ctxt->inSubset = 2;
11746 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11747 (ctxt->sax->externalSubset != NULL))
11748 ctxt->sax->externalSubset(ctxt->userData,
11749 ctxt->intSubName, ctxt->extSubSystem,
11750 ctxt->extSubURI);
11751 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011752 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011753 ctxt->instate = XML_PARSER_PROLOG;
11754#ifdef DEBUG_PUSH
11755 xmlGenericError(xmlGenericErrorContext,
11756 "PP: entering PROLOG\n");
11757#endif
11758 }
11759 } else if ((cur == '<') && (next == '!') &&
11760 (avail < 9)) {
11761 goto done;
11762 } else {
11763 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011764 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011765 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011766#ifdef DEBUG_PUSH
11767 xmlGenericError(xmlGenericErrorContext,
11768 "PP: entering START_TAG\n");
11769#endif
11770 }
11771 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011772 case XML_PARSER_PROLOG:
11773 SKIP_BLANKS;
11774 if (ctxt->input->buf == NULL)
11775 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11776 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011777 avail = xmlBufUse(ctxt->input->buf->buffer) -
11778 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011779 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011780 goto done;
11781 cur = ctxt->input->cur[0];
11782 next = ctxt->input->cur[1];
11783 if ((cur == '<') && (next == '?')) {
11784 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011785 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11786 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011787 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011788 }
Owen Taylor3473f882001-02-23 17:55:21 +000011789#ifdef DEBUG_PUSH
11790 xmlGenericError(xmlGenericErrorContext,
11791 "PP: Parsing PI\n");
11792#endif
11793 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011794 if (ctxt->instate == XML_PARSER_EOF)
11795 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011796 ctxt->instate = XML_PARSER_PROLOG;
11797 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011798 } else if ((cur == '<') && (next == '!') &&
11799 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11800 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011801 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11802 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011803 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011804 }
Owen Taylor3473f882001-02-23 17:55:21 +000011805#ifdef DEBUG_PUSH
11806 xmlGenericError(xmlGenericErrorContext,
11807 "PP: Parsing Comment\n");
11808#endif
11809 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011810 if (ctxt->instate == XML_PARSER_EOF)
11811 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011812 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011813 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011814 } else if ((cur == '<') && (next == '!') &&
11815 (avail < 4)) {
11816 goto done;
11817 } else {
11818 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011819 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011820 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011821 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011822#ifdef DEBUG_PUSH
11823 xmlGenericError(xmlGenericErrorContext,
11824 "PP: entering START_TAG\n");
11825#endif
11826 }
11827 break;
11828 case XML_PARSER_EPILOG:
11829 SKIP_BLANKS;
11830 if (ctxt->input->buf == NULL)
11831 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11832 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011833 avail = xmlBufUse(ctxt->input->buf->buffer) -
11834 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011835 if (avail < 2)
11836 goto done;
11837 cur = ctxt->input->cur[0];
11838 next = ctxt->input->cur[1];
11839 if ((cur == '<') && (next == '?')) {
11840 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011841 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11842 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011843 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011844 }
Owen Taylor3473f882001-02-23 17:55:21 +000011845#ifdef DEBUG_PUSH
11846 xmlGenericError(xmlGenericErrorContext,
11847 "PP: Parsing PI\n");
11848#endif
11849 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011850 if (ctxt->instate == XML_PARSER_EOF)
11851 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011852 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011853 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011854 } else if ((cur == '<') && (next == '!') &&
11855 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11856 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011857 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11858 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011859 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011860 }
Owen Taylor3473f882001-02-23 17:55:21 +000011861#ifdef DEBUG_PUSH
11862 xmlGenericError(xmlGenericErrorContext,
11863 "PP: Parsing Comment\n");
11864#endif
11865 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011866 if (ctxt->instate == XML_PARSER_EOF)
11867 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011868 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011869 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011870 } else if ((cur == '<') && (next == '!') &&
11871 (avail < 4)) {
11872 goto done;
11873 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011874 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011875 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011876#ifdef DEBUG_PUSH
11877 xmlGenericError(xmlGenericErrorContext,
11878 "PP: entering EOF\n");
11879#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011880 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011881 ctxt->sax->endDocument(ctxt->userData);
11882 goto done;
11883 }
11884 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011885 case XML_PARSER_DTD: {
11886 /*
11887 * Sorry but progressive parsing of the internal subset
11888 * is not expected to be supported. We first check that
11889 * the full content of the internal subset is available and
11890 * the parsing is launched only at that point.
11891 * Internal subset ends up with "']' S? '>'" in an unescaped
11892 * section and not in a ']]>' sequence which are conditional
11893 * sections (whoever argued to keep that crap in XML deserve
11894 * a place in hell !).
11895 */
11896 int base, i;
11897 xmlChar *buf;
11898 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011899 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011900
11901 base = ctxt->input->cur - ctxt->input->base;
11902 if (base < 0) return(0);
11903 if (ctxt->checkIndex > base)
11904 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011905 buf = xmlBufContent(ctxt->input->buf->buffer);
11906 use = xmlBufUse(ctxt->input->buf->buffer);
11907 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011908 if (quote != 0) {
11909 if (buf[base] == quote)
11910 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011911 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011912 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011913 if ((quote == 0) && (buf[base] == '<')) {
11914 int found = 0;
11915 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011916 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011917 (buf[base + 1] == '!') &&
11918 (buf[base + 2] == '-') &&
11919 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011920 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011921 if ((buf[base] == '-') &&
11922 (buf[base + 1] == '-') &&
11923 (buf[base + 2] == '>')) {
11924 found = 1;
11925 base += 2;
11926 break;
11927 }
11928 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011929 if (!found) {
11930#if 0
11931 fprintf(stderr, "unfinished comment\n");
11932#endif
11933 break; /* for */
11934 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011935 continue;
11936 }
11937 }
Owen Taylor3473f882001-02-23 17:55:21 +000011938 if (buf[base] == '"') {
11939 quote = '"';
11940 continue;
11941 }
11942 if (buf[base] == '\'') {
11943 quote = '\'';
11944 continue;
11945 }
11946 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011947#if 0
11948 fprintf(stderr, "%c%c%c%c: ", buf[base],
11949 buf[base + 1], buf[base + 2], buf[base + 3]);
11950#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011951 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011952 break;
11953 if (buf[base + 1] == ']') {
11954 /* conditional crap, skip both ']' ! */
11955 base++;
11956 continue;
11957 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011958 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011959 if (buf[base + i] == '>') {
11960#if 0
11961 fprintf(stderr, "found\n");
11962#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011963 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011964 }
11965 if (!IS_BLANK_CH(buf[base + i])) {
11966#if 0
11967 fprintf(stderr, "not found\n");
11968#endif
11969 goto not_end_of_int_subset;
11970 }
Owen Taylor3473f882001-02-23 17:55:21 +000011971 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011972#if 0
11973 fprintf(stderr, "end of stream\n");
11974#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011975 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011976
Owen Taylor3473f882001-02-23 17:55:21 +000011977 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011978not_end_of_int_subset:
11979 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011980 }
11981 /*
11982 * We didn't found the end of the Internal subset
11983 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011984 if (quote == 0)
11985 ctxt->checkIndex = base;
11986 else
11987 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011988#ifdef DEBUG_PUSH
11989 if (next == 0)
11990 xmlGenericError(xmlGenericErrorContext,
11991 "PP: lookup of int subset end filed\n");
11992#endif
11993 goto done;
11994
11995found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011996 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011997 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011998 if (ctxt->instate == XML_PARSER_EOF)
11999 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012000 ctxt->inSubset = 2;
12001 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12002 (ctxt->sax->externalSubset != NULL))
12003 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12004 ctxt->extSubSystem, ctxt->extSubURI);
12005 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012006 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012007 if (ctxt->instate == XML_PARSER_EOF)
12008 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012009 ctxt->instate = XML_PARSER_PROLOG;
12010 ctxt->checkIndex = 0;
12011#ifdef DEBUG_PUSH
12012 xmlGenericError(xmlGenericErrorContext,
12013 "PP: entering PROLOG\n");
12014#endif
12015 break;
12016 }
12017 case XML_PARSER_COMMENT:
12018 xmlGenericError(xmlGenericErrorContext,
12019 "PP: internal error, state == COMMENT\n");
12020 ctxt->instate = XML_PARSER_CONTENT;
12021#ifdef DEBUG_PUSH
12022 xmlGenericError(xmlGenericErrorContext,
12023 "PP: entering CONTENT\n");
12024#endif
12025 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012026 case XML_PARSER_IGNORE:
12027 xmlGenericError(xmlGenericErrorContext,
12028 "PP: internal error, state == IGNORE");
12029 ctxt->instate = XML_PARSER_DTD;
12030#ifdef DEBUG_PUSH
12031 xmlGenericError(xmlGenericErrorContext,
12032 "PP: entering DTD\n");
12033#endif
12034 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012035 case XML_PARSER_PI:
12036 xmlGenericError(xmlGenericErrorContext,
12037 "PP: internal error, state == PI\n");
12038 ctxt->instate = XML_PARSER_CONTENT;
12039#ifdef DEBUG_PUSH
12040 xmlGenericError(xmlGenericErrorContext,
12041 "PP: entering CONTENT\n");
12042#endif
12043 break;
12044 case XML_PARSER_ENTITY_DECL:
12045 xmlGenericError(xmlGenericErrorContext,
12046 "PP: internal error, state == ENTITY_DECL\n");
12047 ctxt->instate = XML_PARSER_DTD;
12048#ifdef DEBUG_PUSH
12049 xmlGenericError(xmlGenericErrorContext,
12050 "PP: entering DTD\n");
12051#endif
12052 break;
12053 case XML_PARSER_ENTITY_VALUE:
12054 xmlGenericError(xmlGenericErrorContext,
12055 "PP: internal error, state == ENTITY_VALUE\n");
12056 ctxt->instate = XML_PARSER_CONTENT;
12057#ifdef DEBUG_PUSH
12058 xmlGenericError(xmlGenericErrorContext,
12059 "PP: entering DTD\n");
12060#endif
12061 break;
12062 case XML_PARSER_ATTRIBUTE_VALUE:
12063 xmlGenericError(xmlGenericErrorContext,
12064 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12065 ctxt->instate = XML_PARSER_START_TAG;
12066#ifdef DEBUG_PUSH
12067 xmlGenericError(xmlGenericErrorContext,
12068 "PP: entering START_TAG\n");
12069#endif
12070 break;
12071 case XML_PARSER_SYSTEM_LITERAL:
12072 xmlGenericError(xmlGenericErrorContext,
12073 "PP: internal error, state == SYSTEM_LITERAL\n");
12074 ctxt->instate = XML_PARSER_START_TAG;
12075#ifdef DEBUG_PUSH
12076 xmlGenericError(xmlGenericErrorContext,
12077 "PP: entering START_TAG\n");
12078#endif
12079 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012080 case XML_PARSER_PUBLIC_LITERAL:
12081 xmlGenericError(xmlGenericErrorContext,
12082 "PP: internal error, state == PUBLIC_LITERAL\n");
12083 ctxt->instate = XML_PARSER_START_TAG;
12084#ifdef DEBUG_PUSH
12085 xmlGenericError(xmlGenericErrorContext,
12086 "PP: entering START_TAG\n");
12087#endif
12088 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012089 }
12090 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012091done:
Owen Taylor3473f882001-02-23 17:55:21 +000012092#ifdef DEBUG_PUSH
12093 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12094#endif
12095 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012096encoding_error:
12097 {
12098 char buffer[150];
12099
12100 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12101 ctxt->input->cur[0], ctxt->input->cur[1],
12102 ctxt->input->cur[2], ctxt->input->cur[3]);
12103 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12104 "Input is not proper UTF-8, indicate encoding !\n%s",
12105 BAD_CAST buffer, NULL);
12106 }
12107 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012108}
12109
12110/**
Daniel Veillard65686452012-07-19 18:25:01 +080012111 * xmlParseCheckTransition:
12112 * @ctxt: an XML parser context
12113 * @chunk: a char array
12114 * @size: the size in byte of the chunk
12115 *
12116 * Check depending on the current parser state if the chunk given must be
12117 * processed immediately or one need more data to advance on parsing.
12118 *
12119 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12120 */
12121static int
12122xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12123 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12124 return(-1);
12125 if (ctxt->instate == XML_PARSER_START_TAG) {
12126 if (memchr(chunk, '>', size) != NULL)
12127 return(1);
12128 return(0);
12129 }
12130 if (ctxt->progressive == XML_PARSER_COMMENT) {
12131 if (memchr(chunk, '>', size) != NULL)
12132 return(1);
12133 return(0);
12134 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012135 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12136 if (memchr(chunk, '>', size) != NULL)
12137 return(1);
12138 return(0);
12139 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012140 if (ctxt->progressive == XML_PARSER_PI) {
12141 if (memchr(chunk, '>', size) != NULL)
12142 return(1);
12143 return(0);
12144 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012145 if (ctxt->instate == XML_PARSER_END_TAG) {
12146 if (memchr(chunk, '>', size) != NULL)
12147 return(1);
12148 return(0);
12149 }
12150 if ((ctxt->progressive == XML_PARSER_DTD) ||
12151 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012152 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012153 return(1);
12154 return(0);
12155 }
Daniel Veillard65686452012-07-19 18:25:01 +080012156 return(1);
12157}
12158
12159/**
Owen Taylor3473f882001-02-23 17:55:21 +000012160 * xmlParseChunk:
12161 * @ctxt: an XML parser context
12162 * @chunk: an char array
12163 * @size: the size in byte of the chunk
12164 * @terminate: last chunk indicator
12165 *
12166 * Parse a Chunk of memory
12167 *
12168 * Returns zero if no error, the xmlParserErrors otherwise.
12169 */
12170int
12171xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12172 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012173 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012174 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012175 size_t old_avail = 0;
12176 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012177
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012178 if (ctxt == NULL)
12179 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012180 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012181 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012182 if (ctxt->instate == XML_PARSER_EOF)
12183 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012184 if (ctxt->instate == XML_PARSER_START)
12185 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012186 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12187 (chunk[size - 1] == '\r')) {
12188 end_in_lf = 1;
12189 size--;
12190 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012191
12192xmldecl_done:
12193
Owen Taylor3473f882001-02-23 17:55:21 +000012194 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12195 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012196 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12197 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012198 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012199
Daniel Veillard65686452012-07-19 18:25:01 +080012200 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012201 /*
12202 * Specific handling if we autodetected an encoding, we should not
12203 * push more than the first line ... which depend on the encoding
12204 * And only push the rest once the final encoding was detected
12205 */
12206 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12207 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012208 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012209
12210 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12211 BAD_CAST "UTF-16")) ||
12212 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12213 BAD_CAST "UTF16")))
12214 len = 90;
12215 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12216 BAD_CAST "UCS-4")) ||
12217 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12218 BAD_CAST "UCS4")))
12219 len = 180;
12220
12221 if (ctxt->input->buf->rawconsumed < len)
12222 len -= ctxt->input->buf->rawconsumed;
12223
Raul Hudeaba9716a2010-03-15 10:13:29 +010012224 /*
12225 * Change size for reading the initial declaration only
12226 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12227 * will blindly copy extra bytes from memory.
12228 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012229 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012230 remain = size - len;
12231 size = len;
12232 } else {
12233 remain = 0;
12234 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012235 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012236 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012237 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
William M. Bracka3215c72004-07-31 16:24:01 +000012238 if (res < 0) {
12239 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012240 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012241 return (XML_PARSER_EOF);
12242 }
Owen Taylor3473f882001-02-23 17:55:21 +000012243#ifdef DEBUG_PUSH
12244 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12245#endif
12246
Owen Taylor3473f882001-02-23 17:55:21 +000012247 } else if (ctxt->instate != XML_PARSER_EOF) {
12248 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12249 xmlParserInputBufferPtr in = ctxt->input->buf;
12250 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12251 (in->raw != NULL)) {
12252 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012253 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12254 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012255
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012256 nbchars = xmlCharEncInput(in, terminate);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012257 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012258 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012259 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012260 xmlGenericError(xmlGenericErrorContext,
12261 "xmlParseChunk: encoder error\n");
Nick Wellnhoferab362ab2018-01-22 15:40:05 +010012262 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012263 return(XML_ERR_INVALID_ENCODING);
12264 }
12265 }
12266 }
12267 }
Daniel Veillard65686452012-07-19 18:25:01 +080012268 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012269 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012270 } else {
12271 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12272 avail = xmlBufUse(ctxt->input->buf->buffer);
12273 /*
12274 * Depending on the current state it may not be such
12275 * a good idea to try parsing if there is nothing in the chunk
12276 * which would be worth doing a parser state transition and we
12277 * need to wait for more data
12278 */
12279 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12280 (old_avail == 0) || (avail == 0) ||
12281 (xmlParseCheckTransition(ctxt,
12282 (const char *)&ctxt->input->base[old_avail],
12283 avail - old_avail)))
12284 xmlParseTryOrFinish(ctxt, terminate);
12285 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012286 if (ctxt->instate == XML_PARSER_EOF)
12287 return(ctxt->errNo);
12288
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012289 if ((ctxt->input != NULL) &&
12290 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12291 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12292 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12293 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012294 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012295 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012296 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12297 return(ctxt->errNo);
12298
12299 if (remain != 0) {
12300 chunk += size;
12301 size = remain;
12302 remain = 0;
12303 goto xmldecl_done;
12304 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012305 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12306 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012307 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12308 ctxt->input);
12309 size_t current = ctxt->input->cur - ctxt->input->base;
12310
Daniel Veillarda617e242006-01-09 14:38:44 +000012311 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012312
12313 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12314 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012315 }
Owen Taylor3473f882001-02-23 17:55:21 +000012316 if (terminate) {
12317 /*
12318 * Check for termination
12319 */
Daniel Veillard65686452012-07-19 18:25:01 +080012320 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012321
12322 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012323 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012324 cur_avail = ctxt->input->length -
12325 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012326 else
Daniel Veillard65686452012-07-19 18:25:01 +080012327 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12328 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012329 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012330
Owen Taylor3473f882001-02-23 17:55:21 +000012331 if ((ctxt->instate != XML_PARSER_EOF) &&
12332 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012333 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012334 }
Daniel Veillard65686452012-07-19 18:25:01 +080012335 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012336 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012337 }
Owen Taylor3473f882001-02-23 17:55:21 +000012338 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012339 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012340 ctxt->sax->endDocument(ctxt->userData);
12341 }
12342 ctxt->instate = XML_PARSER_EOF;
12343 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012344 if (ctxt->wellFormed == 0)
12345 return((xmlParserErrors) ctxt->errNo);
12346 else
12347 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012348}
12349
12350/************************************************************************
12351 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012352 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012353 * *
12354 ************************************************************************/
12355
12356/**
Owen Taylor3473f882001-02-23 17:55:21 +000012357 * xmlCreatePushParserCtxt:
12358 * @sax: a SAX handler
12359 * @user_data: The user data returned on SAX callbacks
12360 * @chunk: a pointer to an array of chars
12361 * @size: number of chars in the array
12362 * @filename: an optional file name or URI
12363 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012364 * Create a parser context for using the XML parser in push mode.
12365 * If @buffer and @size are non-NULL, the data is used to detect
12366 * the encoding. The remaining characters will be parsed so they
12367 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012368 * To allow content encoding detection, @size should be >= 4
12369 * The value of @filename is used for fetching external entities
12370 * and error/warning reports.
12371 *
12372 * Returns the new parser context or NULL
12373 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012374
Owen Taylor3473f882001-02-23 17:55:21 +000012375xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012376xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012377 const char *chunk, int size, const char *filename) {
12378 xmlParserCtxtPtr ctxt;
12379 xmlParserInputPtr inputStream;
12380 xmlParserInputBufferPtr buf;
12381 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12382
12383 /*
12384 * plug some encoding conversion routines
12385 */
12386 if ((chunk != NULL) && (size >= 4))
12387 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12388
12389 buf = xmlAllocParserInputBuffer(enc);
12390 if (buf == NULL) return(NULL);
12391
12392 ctxt = xmlNewParserCtxt();
12393 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012394 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012395 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012396 return(NULL);
12397 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012398 ctxt->dictNames = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012399 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012400#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012401 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012402#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012403 xmlFree(ctxt->sax);
12404 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12405 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012406 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012407 xmlFreeParserInputBuffer(buf);
12408 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012409 return(NULL);
12410 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012411 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12412 if (sax->initialized == XML_SAX2_MAGIC)
12413 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12414 else
12415 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012416 if (user_data != NULL)
12417 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012418 }
Owen Taylor3473f882001-02-23 17:55:21 +000012419 if (filename == NULL) {
12420 ctxt->directory = NULL;
12421 } else {
12422 ctxt->directory = xmlParserGetDirectory(filename);
12423 }
12424
12425 inputStream = xmlNewInputStream(ctxt);
12426 if (inputStream == NULL) {
12427 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012428 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012429 return(NULL);
12430 }
12431
12432 if (filename == NULL)
12433 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012434 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012435 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012436 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012437 if (inputStream->filename == NULL) {
12438 xmlFreeParserCtxt(ctxt);
12439 xmlFreeParserInputBuffer(buf);
12440 return(NULL);
12441 }
12442 }
Owen Taylor3473f882001-02-23 17:55:21 +000012443 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012444 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012445 inputPush(ctxt, inputStream);
12446
William M. Brack3a1cd212005-02-11 14:35:54 +000012447 /*
12448 * If the caller didn't provide an initial 'chunk' for determining
12449 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12450 * that it can be automatically determined later
12451 */
12452 if ((size == 0) || (chunk == NULL)) {
12453 ctxt->charset = XML_CHAR_ENCODING_NONE;
12454 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012455 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12456 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012457
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012458 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012459
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012460 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012461#ifdef DEBUG_PUSH
12462 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12463#endif
12464 }
12465
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012466 if (enc != XML_CHAR_ENCODING_NONE) {
12467 xmlSwitchEncoding(ctxt, enc);
12468 }
12469
Owen Taylor3473f882001-02-23 17:55:21 +000012470 return(ctxt);
12471}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012472#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012473
12474/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012475 * xmlHaltParser:
12476 * @ctxt: an XML parser context
12477 *
12478 * Blocks further parser processing don't override error
12479 * for internal use
12480 */
12481static void
12482xmlHaltParser(xmlParserCtxtPtr ctxt) {
12483 if (ctxt == NULL)
12484 return;
12485 ctxt->instate = XML_PARSER_EOF;
12486 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012487 while (ctxt->inputNr > 1)
12488 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012489 if (ctxt->input != NULL) {
12490 /*
12491 * in case there was a specific allocation deallocate before
12492 * overriding base
12493 */
12494 if (ctxt->input->free != NULL) {
12495 ctxt->input->free((xmlChar *) ctxt->input->base);
12496 ctxt->input->free = NULL;
12497 }
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012498 if (ctxt->input->buf != NULL) {
12499 xmlFreeParserInputBuffer(ctxt->input->buf);
12500 ctxt->input->buf = NULL;
12501 }
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012502 ctxt->input->cur = BAD_CAST"";
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012503 ctxt->input->length = 0;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012504 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012505 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012506 }
12507}
12508
12509/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012510 * xmlStopParser:
12511 * @ctxt: an XML parser context
12512 *
12513 * Blocks further parser processing
12514 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012515void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012516xmlStopParser(xmlParserCtxtPtr ctxt) {
12517 if (ctxt == NULL)
12518 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012519 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012520 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012521}
12522
12523/**
Owen Taylor3473f882001-02-23 17:55:21 +000012524 * xmlCreateIOParserCtxt:
12525 * @sax: a SAX handler
12526 * @user_data: The user data returned on SAX callbacks
12527 * @ioread: an I/O read function
12528 * @ioclose: an I/O close function
12529 * @ioctx: an I/O handler
12530 * @enc: the charset encoding if known
12531 *
12532 * Create a parser context for using the XML parser with an existing
12533 * I/O stream
12534 *
12535 * Returns the new parser context or NULL
12536 */
12537xmlParserCtxtPtr
12538xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12539 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12540 void *ioctx, xmlCharEncoding enc) {
12541 xmlParserCtxtPtr ctxt;
12542 xmlParserInputPtr inputStream;
12543 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012544
Daniel Veillard42595322004-11-08 10:52:06 +000012545 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012546
12547 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012548 if (buf == NULL) {
12549 if (ioclose != NULL)
12550 ioclose(ioctx);
12551 return (NULL);
12552 }
Owen Taylor3473f882001-02-23 17:55:21 +000012553
12554 ctxt = xmlNewParserCtxt();
12555 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012556 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012557 return(NULL);
12558 }
12559 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012560#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012561 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012562#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012563 xmlFree(ctxt->sax);
12564 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12565 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012566 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012567 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012568 return(NULL);
12569 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012570 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12571 if (sax->initialized == XML_SAX2_MAGIC)
12572 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12573 else
12574 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012575 if (user_data != NULL)
12576 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012577 }
Owen Taylor3473f882001-02-23 17:55:21 +000012578
12579 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12580 if (inputStream == NULL) {
12581 xmlFreeParserCtxt(ctxt);
12582 return(NULL);
12583 }
12584 inputPush(ctxt, inputStream);
12585
12586 return(ctxt);
12587}
12588
Daniel Veillard4432df22003-09-28 18:58:27 +000012589#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012590/************************************************************************
12591 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012592 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012593 * *
12594 ************************************************************************/
12595
12596/**
12597 * xmlIOParseDTD:
12598 * @sax: the SAX handler block or NULL
12599 * @input: an Input Buffer
12600 * @enc: the charset encoding if known
12601 *
12602 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012603 *
Owen Taylor3473f882001-02-23 17:55:21 +000012604 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012605 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012606 */
12607
12608xmlDtdPtr
12609xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12610 xmlCharEncoding enc) {
12611 xmlDtdPtr ret = NULL;
12612 xmlParserCtxtPtr ctxt;
12613 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012614 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012615
12616 if (input == NULL)
12617 return(NULL);
12618
12619 ctxt = xmlNewParserCtxt();
12620 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012621 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012622 return(NULL);
12623 }
12624
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012625 /* We are loading a DTD */
12626 ctxt->options |= XML_PARSE_DTDLOAD;
12627
Owen Taylor3473f882001-02-23 17:55:21 +000012628 /*
12629 * Set-up the SAX context
12630 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012631 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012632 if (ctxt->sax != NULL)
12633 xmlFree(ctxt->sax);
12634 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012635 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012636 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012637 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012638
12639 /*
12640 * generate a parser input from the I/O handler
12641 */
12642
Daniel Veillard43caefb2003-12-07 19:32:22 +000012643 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012644 if (pinput == NULL) {
12645 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012646 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012647 xmlFreeParserCtxt(ctxt);
12648 return(NULL);
12649 }
12650
12651 /*
12652 * plug some encoding conversion routines here.
12653 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012654 if (xmlPushInput(ctxt, pinput) < 0) {
12655 if (sax != NULL) ctxt->sax = NULL;
12656 xmlFreeParserCtxt(ctxt);
12657 return(NULL);
12658 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012659 if (enc != XML_CHAR_ENCODING_NONE) {
12660 xmlSwitchEncoding(ctxt, enc);
12661 }
Owen Taylor3473f882001-02-23 17:55:21 +000012662
12663 pinput->filename = NULL;
12664 pinput->line = 1;
12665 pinput->col = 1;
12666 pinput->base = ctxt->input->cur;
12667 pinput->cur = ctxt->input->cur;
12668 pinput->free = NULL;
12669
12670 /*
12671 * let's parse that entity knowing it's an external subset.
12672 */
12673 ctxt->inSubset = 2;
12674 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012675 if (ctxt->myDoc == NULL) {
12676 xmlErrMemory(ctxt, "New Doc failed");
12677 return(NULL);
12678 }
12679 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012680 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12681 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012682
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012683 if ((enc == XML_CHAR_ENCODING_NONE) &&
12684 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012685 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012686 * Get the 4 first bytes and decode the charset
12687 * if enc != XML_CHAR_ENCODING_NONE
12688 * plug some encoding conversion routines.
12689 */
12690 start[0] = RAW;
12691 start[1] = NXT(1);
12692 start[2] = NXT(2);
12693 start[3] = NXT(3);
12694 enc = xmlDetectCharEncoding(start, 4);
12695 if (enc != XML_CHAR_ENCODING_NONE) {
12696 xmlSwitchEncoding(ctxt, enc);
12697 }
12698 }
12699
Owen Taylor3473f882001-02-23 17:55:21 +000012700 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12701
12702 if (ctxt->myDoc != NULL) {
12703 if (ctxt->wellFormed) {
12704 ret = ctxt->myDoc->extSubset;
12705 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012706 if (ret != NULL) {
12707 xmlNodePtr tmp;
12708
12709 ret->doc = NULL;
12710 tmp = ret->children;
12711 while (tmp != NULL) {
12712 tmp->doc = NULL;
12713 tmp = tmp->next;
12714 }
12715 }
Owen Taylor3473f882001-02-23 17:55:21 +000012716 } else {
12717 ret = NULL;
12718 }
12719 xmlFreeDoc(ctxt->myDoc);
12720 ctxt->myDoc = NULL;
12721 }
12722 if (sax != NULL) ctxt->sax = NULL;
12723 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012724
Owen Taylor3473f882001-02-23 17:55:21 +000012725 return(ret);
12726}
12727
12728/**
12729 * xmlSAXParseDTD:
12730 * @sax: the SAX handler block
12731 * @ExternalID: a NAME* containing the External ID of the DTD
12732 * @SystemID: a NAME* containing the URL to the DTD
12733 *
12734 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012735 *
Owen Taylor3473f882001-02-23 17:55:21 +000012736 * Returns the resulting xmlDtdPtr or NULL in case of error.
12737 */
12738
12739xmlDtdPtr
12740xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12741 const xmlChar *SystemID) {
12742 xmlDtdPtr ret = NULL;
12743 xmlParserCtxtPtr ctxt;
12744 xmlParserInputPtr input = NULL;
12745 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012746 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012747
12748 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12749
12750 ctxt = xmlNewParserCtxt();
12751 if (ctxt == NULL) {
12752 return(NULL);
12753 }
12754
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012755 /* We are loading a DTD */
12756 ctxt->options |= XML_PARSE_DTDLOAD;
12757
Owen Taylor3473f882001-02-23 17:55:21 +000012758 /*
12759 * Set-up the SAX context
12760 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012761 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012762 if (ctxt->sax != NULL)
12763 xmlFree(ctxt->sax);
12764 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012765 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012766 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012767
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012768 /*
12769 * Canonicalise the system ID
12770 */
12771 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012772 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012773 xmlFreeParserCtxt(ctxt);
12774 return(NULL);
12775 }
Owen Taylor3473f882001-02-23 17:55:21 +000012776
12777 /*
12778 * Ask the Entity resolver to load the damn thing
12779 */
12780
12781 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012782 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12783 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012784 if (input == NULL) {
12785 if (sax != NULL) ctxt->sax = NULL;
12786 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012787 if (systemIdCanonic != NULL)
12788 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012789 return(NULL);
12790 }
12791
12792 /*
12793 * plug some encoding conversion routines here.
12794 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012795 if (xmlPushInput(ctxt, input) < 0) {
12796 if (sax != NULL) ctxt->sax = NULL;
12797 xmlFreeParserCtxt(ctxt);
12798 if (systemIdCanonic != NULL)
12799 xmlFree(systemIdCanonic);
12800 return(NULL);
12801 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012802 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12803 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12804 xmlSwitchEncoding(ctxt, enc);
12805 }
Owen Taylor3473f882001-02-23 17:55:21 +000012806
12807 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012808 input->filename = (char *) systemIdCanonic;
12809 else
12810 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012811 input->line = 1;
12812 input->col = 1;
12813 input->base = ctxt->input->cur;
12814 input->cur = ctxt->input->cur;
12815 input->free = NULL;
12816
12817 /*
12818 * let's parse that entity knowing it's an external subset.
12819 */
12820 ctxt->inSubset = 2;
12821 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012822 if (ctxt->myDoc == NULL) {
12823 xmlErrMemory(ctxt, "New Doc failed");
12824 if (sax != NULL) ctxt->sax = NULL;
12825 xmlFreeParserCtxt(ctxt);
12826 return(NULL);
12827 }
12828 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012829 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12830 ExternalID, SystemID);
12831 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12832
12833 if (ctxt->myDoc != NULL) {
12834 if (ctxt->wellFormed) {
12835 ret = ctxt->myDoc->extSubset;
12836 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012837 if (ret != NULL) {
12838 xmlNodePtr tmp;
12839
12840 ret->doc = NULL;
12841 tmp = ret->children;
12842 while (tmp != NULL) {
12843 tmp->doc = NULL;
12844 tmp = tmp->next;
12845 }
12846 }
Owen Taylor3473f882001-02-23 17:55:21 +000012847 } else {
12848 ret = NULL;
12849 }
12850 xmlFreeDoc(ctxt->myDoc);
12851 ctxt->myDoc = NULL;
12852 }
12853 if (sax != NULL) ctxt->sax = NULL;
12854 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012855
Owen Taylor3473f882001-02-23 17:55:21 +000012856 return(ret);
12857}
12858
Daniel Veillard4432df22003-09-28 18:58:27 +000012859
Owen Taylor3473f882001-02-23 17:55:21 +000012860/**
12861 * xmlParseDTD:
12862 * @ExternalID: a NAME* containing the External ID of the DTD
12863 * @SystemID: a NAME* containing the URL to the DTD
12864 *
12865 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012866 *
Owen Taylor3473f882001-02-23 17:55:21 +000012867 * Returns the resulting xmlDtdPtr or NULL in case of error.
12868 */
12869
12870xmlDtdPtr
12871xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12872 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12873}
Daniel Veillard4432df22003-09-28 18:58:27 +000012874#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012875
12876/************************************************************************
12877 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012878 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012879 * *
12880 ************************************************************************/
12881
12882/**
Owen Taylor3473f882001-02-23 17:55:21 +000012883 * xmlParseCtxtExternalEntity:
12884 * @ctx: the existing parsing context
12885 * @URL: the URL for the entity to load
12886 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012887 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012888 *
12889 * Parse an external general entity within an existing parsing context
12890 * An external general parsed entity is well-formed if it matches the
12891 * production labeled extParsedEnt.
12892 *
12893 * [78] extParsedEnt ::= TextDecl? content
12894 *
12895 * Returns 0 if the entity is well formed, -1 in case of args problem and
12896 * the parser error code otherwise
12897 */
12898
12899int
12900xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012901 const xmlChar *ID, xmlNodePtr *lst) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012902 void *userData;
Owen Taylor3473f882001-02-23 17:55:21 +000012903
Daniel Veillardce682bc2004-11-05 17:22:25 +000012904 if (ctx == NULL) return(-1);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012905 /*
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012906 * If the user provided their own SAX callbacks, then reuse the
12907 * userData callback field, otherwise the expected setup in a
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012908 * DOM builder is to have userData == ctxt
12909 */
12910 if (ctx->userData == ctx)
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012911 userData = NULL;
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012912 else
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012913 userData = ctx->userData;
12914 return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12915 userData, ctx->depth + 1,
12916 URL, ID, lst);
Owen Taylor3473f882001-02-23 17:55:21 +000012917}
12918
12919/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012920 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012921 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012922 * @oldctxt: the previous parser context if available
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012923 * @sax: the SAX handler block (possibly NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000012924 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12925 * @depth: Used for loop detection, use 0
12926 * @URL: the URL for the entity to load
12927 * @ID: the System ID for the entity to load
12928 * @list: the return value for the set of parsed nodes
12929 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012930 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012931 *
12932 * Returns 0 if the entity is well formed, -1 in case of args problem and
12933 * the parser error code otherwise
12934 */
12935
Daniel Veillard7d515752003-09-26 19:12:37 +000012936static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012937xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12938 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012939 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012940 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012941 xmlParserCtxtPtr ctxt;
12942 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012943 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012944 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012945 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012946 xmlChar start[4];
12947 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012948
Daniel Veillard0161e632008-08-28 15:36:32 +000012949 if (((depth > 40) &&
12950 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12951 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012952 return(XML_ERR_ENTITY_LOOP);
12953 }
12954
Owen Taylor3473f882001-02-23 17:55:21 +000012955 if (list != NULL)
12956 *list = NULL;
12957 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012958 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012959 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012960 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012961
12962
Rob Richards9c0aa472009-03-26 18:10:19 +000012963 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012964 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012965 ctxt->userData = ctxt;
12966 if (sax != NULL) {
12967 oldsax = ctxt->sax;
12968 ctxt->sax = sax;
12969 if (user_data != NULL)
12970 ctxt->userData = user_data;
12971 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012972 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012973 newDoc = xmlNewDoc(BAD_CAST "1.0");
12974 if (newDoc == NULL) {
12975 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012976 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012977 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012978 newDoc->properties = XML_DOC_INTERNAL;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070012979 if (doc) {
12980 newDoc->intSubset = doc->intSubset;
12981 newDoc->extSubset = doc->extSubset;
12982 if (doc->dict) {
12983 newDoc->dict = doc->dict;
12984 xmlDictReference(newDoc->dict);
12985 }
12986 if (doc->URL != NULL) {
12987 newDoc->URL = xmlStrdup(doc->URL);
12988 }
Owen Taylor3473f882001-02-23 17:55:21 +000012989 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012990 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12991 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012992 if (sax != NULL)
12993 ctxt->sax = oldsax;
12994 xmlFreeParserCtxt(ctxt);
12995 newDoc->intSubset = NULL;
12996 newDoc->extSubset = NULL;
12997 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012998 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012999 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013000 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013001 nodePush(ctxt, newDoc->children);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013002 if (doc == NULL) {
13003 ctxt->myDoc = newDoc;
13004 } else {
13005 ctxt->myDoc = doc;
13006 newRoot->doc = doc;
13007 }
Owen Taylor3473f882001-02-23 17:55:21 +000013008
Daniel Veillard0161e632008-08-28 15:36:32 +000013009 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013010 * Get the 4 first bytes and decode the charset
13011 * if enc != XML_CHAR_ENCODING_NONE
13012 * plug some encoding conversion routines.
13013 */
13014 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013015 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13016 start[0] = RAW;
13017 start[1] = NXT(1);
13018 start[2] = NXT(2);
13019 start[3] = NXT(3);
13020 enc = xmlDetectCharEncoding(start, 4);
13021 if (enc != XML_CHAR_ENCODING_NONE) {
13022 xmlSwitchEncoding(ctxt, enc);
13023 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013024 }
13025
Owen Taylor3473f882001-02-23 17:55:21 +000013026 /*
13027 * Parse a possible text declaration first
13028 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013029 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013030 xmlParseTextDecl(ctxt);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013031 /*
13032 * An XML-1.0 document can't reference an entity not XML-1.0
13033 */
13034 if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13035 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13036 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13037 "Version mismatch between document and entity\n");
13038 }
Owen Taylor3473f882001-02-23 17:55:21 +000013039 }
13040
Owen Taylor3473f882001-02-23 17:55:21 +000013041 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013042 ctxt->depth = depth;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013043 if (oldctxt != NULL) {
13044 ctxt->_private = oldctxt->_private;
13045 ctxt->loadsubset = oldctxt->loadsubset;
13046 ctxt->validate = oldctxt->validate;
13047 ctxt->valid = oldctxt->valid;
13048 ctxt->replaceEntities = oldctxt->replaceEntities;
13049 if (oldctxt->validate) {
13050 ctxt->vctxt.error = oldctxt->vctxt.error;
13051 ctxt->vctxt.warning = oldctxt->vctxt.warning;
13052 ctxt->vctxt.userData = oldctxt->vctxt.userData;
13053 }
13054 ctxt->external = oldctxt->external;
13055 if (ctxt->dict) xmlDictFree(ctxt->dict);
13056 ctxt->dict = oldctxt->dict;
13057 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13058 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13059 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13060 ctxt->dictNames = oldctxt->dictNames;
13061 ctxt->attsDefault = oldctxt->attsDefault;
13062 ctxt->attsSpecial = oldctxt->attsSpecial;
13063 ctxt->linenumbers = oldctxt->linenumbers;
13064 ctxt->record_info = oldctxt->record_info;
13065 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13066 ctxt->node_seq.length = oldctxt->node_seq.length;
13067 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13068 } else {
13069 /*
13070 * Doing validity checking on chunk without context
13071 * doesn't make sense
13072 */
13073 ctxt->_private = NULL;
13074 ctxt->validate = 0;
13075 ctxt->external = 2;
13076 ctxt->loadsubset = 0;
13077 }
Owen Taylor3473f882001-02-23 17:55:21 +000013078
13079 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013080
Daniel Veillard561b7f82002-03-20 21:55:57 +000013081 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013082 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013083 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013084 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013085 }
13086 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013087 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013088 }
13089
13090 if (!ctxt->wellFormed) {
13091 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013092 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013093 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013094 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013095 } else {
13096 if (list != NULL) {
13097 xmlNodePtr cur;
13098
13099 /*
13100 * Return the newly created nodeset after unlinking it from
13101 * they pseudo parent.
13102 */
13103 cur = newDoc->children->children;
13104 *list = cur;
13105 while (cur != NULL) {
13106 cur->parent = NULL;
13107 cur = cur->next;
13108 }
13109 newDoc->children->children = NULL;
13110 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013111 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013112 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013113
13114 /*
13115 * Record in the parent context the number of entities replacement
13116 * done when parsing that reference.
13117 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013118 if (oldctxt != NULL)
13119 oldctxt->nbentities += ctxt->nbentities;
13120
Daniel Veillard0161e632008-08-28 15:36:32 +000013121 /*
13122 * Also record the size of the entity parsed
13123 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013124 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013125 oldctxt->sizeentities += ctxt->input->consumed;
13126 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13127 }
13128 /*
13129 * And record the last error if any
13130 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013131 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013132 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13133
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013134 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013135 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013136 if (oldctxt != NULL) {
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013137 ctxt->dict = NULL;
13138 ctxt->attsDefault = NULL;
13139 ctxt->attsSpecial = NULL;
13140 oldctxt->validate = ctxt->validate;
13141 oldctxt->valid = ctxt->valid;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013142 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13143 oldctxt->node_seq.length = ctxt->node_seq.length;
13144 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13145 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013146 ctxt->node_seq.maximum = 0;
13147 ctxt->node_seq.length = 0;
13148 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013149 xmlFreeParserCtxt(ctxt);
13150 newDoc->intSubset = NULL;
13151 newDoc->extSubset = NULL;
13152 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013153
Owen Taylor3473f882001-02-23 17:55:21 +000013154 return(ret);
13155}
13156
Daniel Veillard81273902003-09-30 00:43:48 +000013157#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013158/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013159 * xmlParseExternalEntity:
13160 * @doc: the document the chunk pertains to
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013161 * @sax: the SAX handler block (possibly NULL)
Daniel Veillard257d9102001-05-08 10:41:44 +000013162 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13163 * @depth: Used for loop detection, use 0
13164 * @URL: the URL for the entity to load
13165 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013166 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013167 *
13168 * Parse an external general entity
13169 * An external general parsed entity is well-formed if it matches the
13170 * production labeled extParsedEnt.
13171 *
13172 * [78] extParsedEnt ::= TextDecl? content
13173 *
13174 * Returns 0 if the entity is well formed, -1 in case of args problem and
13175 * the parser error code otherwise
13176 */
13177
13178int
13179xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013180 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013181 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013182 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013183}
13184
13185/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013186 * xmlParseBalancedChunkMemory:
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013187 * @doc: the document the chunk pertains to (must not be NULL)
13188 * @sax: the SAX handler block (possibly NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013189 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13190 * @depth: Used for loop detection, use 0
13191 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013192 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013193 *
13194 * Parse a well-balanced chunk of an XML document
13195 * called by the parser
13196 * The allowed sequence for the Well Balanced Chunk is the one defined by
13197 * the content production in the XML grammar:
13198 *
13199 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13200 *
13201 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13202 * the parser error code otherwise
13203 */
13204
13205int
13206xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013207 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013208 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13209 depth, string, lst, 0 );
13210}
Daniel Veillard81273902003-09-30 00:43:48 +000013211#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013212
13213/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013214 * xmlParseBalancedChunkMemoryInternal:
13215 * @oldctxt: the existing parsing context
13216 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13217 * @user_data: the user data field for the parser context
13218 * @lst: the return value for the set of parsed nodes
13219 *
13220 *
13221 * Parse a well-balanced chunk of an XML document
13222 * called by the parser
13223 * The allowed sequence for the Well Balanced Chunk is the one defined by
13224 * the content production in the XML grammar:
13225 *
13226 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13227 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013228 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13229 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013230 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013231 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013232 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013233 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013234static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013235xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13236 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13237 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013238 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013239 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013240 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013241 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013242 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013243 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013244 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013245#ifdef SAX2
13246 int i;
13247#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013248
Daniel Veillard0161e632008-08-28 15:36:32 +000013249 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13250 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013251 return(XML_ERR_ENTITY_LOOP);
13252 }
13253
13254
13255 if (lst != NULL)
13256 *lst = NULL;
13257 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013258 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013259
13260 size = xmlStrlen(string);
13261
13262 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013263 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013264 if (user_data != NULL)
13265 ctxt->userData = user_data;
13266 else
13267 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013268 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13269 ctxt->dict = oldctxt->dict;
Daniel Veillardad88b542017-12-08 09:42:31 +010013270 ctxt->input_id = oldctxt->input_id + 1;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013271 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13272 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13273 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013274
Daniel Veillard74eaec12009-08-26 15:57:20 +020013275#ifdef SAX2
13276 /* propagate namespaces down the entity */
13277 for (i = 0;i < oldctxt->nsNr;i += 2) {
13278 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13279 }
13280#endif
13281
Daniel Veillard328f48c2002-11-15 15:24:34 +000013282 oldsax = ctxt->sax;
13283 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013284 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013285 ctxt->replaceEntities = oldctxt->replaceEntities;
13286 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013287
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013288 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013289 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013290 newDoc = xmlNewDoc(BAD_CAST "1.0");
13291 if (newDoc == NULL) {
13292 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013293 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013294 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013295 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013296 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013297 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013298 newDoc->dict = ctxt->dict;
13299 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013300 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013301 } else {
13302 ctxt->myDoc = oldctxt->myDoc;
13303 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013304 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013305 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013306 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13307 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013308 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013309 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013310 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013311 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013312 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013313 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013314 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013315 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013316 ctxt->myDoc->children = NULL;
13317 ctxt->myDoc->last = NULL;
13318 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013319 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013320 ctxt->instate = XML_PARSER_CONTENT;
13321 ctxt->depth = oldctxt->depth + 1;
13322
Daniel Veillard328f48c2002-11-15 15:24:34 +000013323 ctxt->validate = 0;
13324 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013325 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13326 /*
13327 * ID/IDREF registration will be done in xmlValidateElement below
13328 */
13329 ctxt->loadsubset |= XML_SKIP_IDS;
13330 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013331 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013332 ctxt->attsDefault = oldctxt->attsDefault;
13333 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013334
Daniel Veillard68e9e742002-11-16 15:35:11 +000013335 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013336 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013337 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013338 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013339 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013340 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013341 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013342 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013343 }
13344
13345 if (!ctxt->wellFormed) {
13346 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013347 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013348 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013349 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013350 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013351 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013352 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013353
William M. Brack7b9154b2003-09-27 19:23:50 +000013354 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013355 xmlNodePtr cur;
13356
13357 /*
13358 * Return the newly created nodeset after unlinking it from
13359 * they pseudo parent.
13360 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013361 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013362 *lst = cur;
13363 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013364#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013365 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13366 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13367 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013368 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13369 oldctxt->myDoc, cur);
13370 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013371#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013372 cur->parent = NULL;
13373 cur = cur->next;
13374 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013375 ctxt->myDoc->children->children = NULL;
13376 }
13377 if (ctxt->myDoc != NULL) {
13378 xmlFreeNode(ctxt->myDoc->children);
13379 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013380 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013381 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013382
13383 /*
13384 * Record in the parent context the number of entities replacement
13385 * done when parsing that reference.
13386 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013387 if (oldctxt != NULL)
13388 oldctxt->nbentities += ctxt->nbentities;
13389
Daniel Veillard0161e632008-08-28 15:36:32 +000013390 /*
13391 * Also record the last error if any
13392 */
13393 if (ctxt->lastError.code != XML_ERR_OK)
13394 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13395
Daniel Veillard328f48c2002-11-15 15:24:34 +000013396 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013397 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013398 ctxt->attsDefault = NULL;
13399 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013400 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013401 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013402 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013403 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013404
Daniel Veillard328f48c2002-11-15 15:24:34 +000013405 return(ret);
13406}
13407
Daniel Veillard29b17482004-08-16 00:39:03 +000013408/**
13409 * xmlParseInNodeContext:
13410 * @node: the context node
13411 * @data: the input string
13412 * @datalen: the input string length in bytes
13413 * @options: a combination of xmlParserOption
13414 * @lst: the return value for the set of parsed nodes
13415 *
13416 * Parse a well-balanced chunk of an XML document
13417 * within the context (DTD, namespaces, etc ...) of the given node.
13418 *
13419 * The allowed sequence for the data is a Well Balanced Chunk defined by
13420 * the content production in the XML grammar:
13421 *
13422 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13423 *
13424 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13425 * error code otherwise
13426 */
13427xmlParserErrors
13428xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13429 int options, xmlNodePtr *lst) {
13430#ifdef SAX2
13431 xmlParserCtxtPtr ctxt;
13432 xmlDocPtr doc = NULL;
13433 xmlNodePtr fake, cur;
13434 int nsnr = 0;
13435
13436 xmlParserErrors ret = XML_ERR_OK;
13437
13438 /*
13439 * check all input parameters, grab the document
13440 */
13441 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13442 return(XML_ERR_INTERNAL_ERROR);
13443 switch (node->type) {
13444 case XML_ELEMENT_NODE:
13445 case XML_ATTRIBUTE_NODE:
13446 case XML_TEXT_NODE:
13447 case XML_CDATA_SECTION_NODE:
13448 case XML_ENTITY_REF_NODE:
13449 case XML_PI_NODE:
13450 case XML_COMMENT_NODE:
13451 case XML_DOCUMENT_NODE:
13452 case XML_HTML_DOCUMENT_NODE:
13453 break;
13454 default:
13455 return(XML_ERR_INTERNAL_ERROR);
13456
13457 }
13458 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13459 (node->type != XML_DOCUMENT_NODE) &&
13460 (node->type != XML_HTML_DOCUMENT_NODE))
13461 node = node->parent;
13462 if (node == NULL)
13463 return(XML_ERR_INTERNAL_ERROR);
13464 if (node->type == XML_ELEMENT_NODE)
13465 doc = node->doc;
13466 else
13467 doc = (xmlDocPtr) node;
13468 if (doc == NULL)
13469 return(XML_ERR_INTERNAL_ERROR);
13470
13471 /*
13472 * allocate a context and set-up everything not related to the
13473 * node position in the tree
13474 */
13475 if (doc->type == XML_DOCUMENT_NODE)
13476 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13477#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013478 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013479 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013480 /*
13481 * When parsing in context, it makes no sense to add implied
13482 * elements like html/body/etc...
13483 */
13484 options |= HTML_PARSE_NOIMPLIED;
13485 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013486#endif
13487 else
13488 return(XML_ERR_INTERNAL_ERROR);
13489
13490 if (ctxt == NULL)
13491 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013492
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013493 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013494 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13495 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13496 * we must wait until the last moment to free the original one.
13497 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013498 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013499 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013500 xmlDictFree(ctxt->dict);
13501 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013502 } else
13503 options |= XML_PARSE_NODICT;
13504
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013505 if (doc->encoding != NULL) {
13506 xmlCharEncodingHandlerPtr hdlr;
13507
13508 if (ctxt->encoding != NULL)
13509 xmlFree((xmlChar *) ctxt->encoding);
13510 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13511
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013512 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013513 if (hdlr != NULL) {
13514 xmlSwitchToEncoding(ctxt, hdlr);
13515 } else {
13516 return(XML_ERR_UNSUPPORTED_ENCODING);
13517 }
13518 }
13519
Daniel Veillard37334572008-07-31 08:20:02 +000013520 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013521 xmlDetectSAX2(ctxt);
13522 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013523 /* parsing in context, i.e. as within existing content */
Daniel Veillardad88b542017-12-08 09:42:31 +010013524 ctxt->input_id = 2;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013525 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013526
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013527 fake = xmlNewComment(NULL);
13528 if (fake == NULL) {
13529 xmlFreeParserCtxt(ctxt);
13530 return(XML_ERR_NO_MEMORY);
13531 }
13532 xmlAddChild(node, fake);
13533
Daniel Veillard29b17482004-08-16 00:39:03 +000013534 if (node->type == XML_ELEMENT_NODE) {
13535 nodePush(ctxt, node);
13536 /*
13537 * initialize the SAX2 namespaces stack
13538 */
13539 cur = node;
13540 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13541 xmlNsPtr ns = cur->nsDef;
13542 const xmlChar *iprefix, *ihref;
13543
13544 while (ns != NULL) {
13545 if (ctxt->dict) {
13546 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13547 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13548 } else {
13549 iprefix = ns->prefix;
13550 ihref = ns->href;
13551 }
13552
13553 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13554 nsPush(ctxt, iprefix, ihref);
13555 nsnr++;
13556 }
13557 ns = ns->next;
13558 }
13559 cur = cur->parent;
13560 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013561 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013562
13563 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13564 /*
13565 * ID/IDREF registration will be done in xmlValidateElement below
13566 */
13567 ctxt->loadsubset |= XML_SKIP_IDS;
13568 }
13569
Daniel Veillard499cc922006-01-18 17:22:35 +000013570#ifdef LIBXML_HTML_ENABLED
13571 if (doc->type == XML_HTML_DOCUMENT_NODE)
13572 __htmlParseContent(ctxt);
13573 else
13574#endif
13575 xmlParseContent(ctxt);
13576
Daniel Veillard29b17482004-08-16 00:39:03 +000013577 nsPop(ctxt, nsnr);
13578 if ((RAW == '<') && (NXT(1) == '/')) {
13579 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13580 } else if (RAW != 0) {
13581 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13582 }
13583 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13584 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13585 ctxt->wellFormed = 0;
13586 }
13587
13588 if (!ctxt->wellFormed) {
13589 if (ctxt->errNo == 0)
13590 ret = XML_ERR_INTERNAL_ERROR;
13591 else
13592 ret = (xmlParserErrors)ctxt->errNo;
13593 } else {
13594 ret = XML_ERR_OK;
13595 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013596
Daniel Veillard29b17482004-08-16 00:39:03 +000013597 /*
13598 * Return the newly created nodeset after unlinking it from
13599 * the pseudo sibling.
13600 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013601
Daniel Veillard29b17482004-08-16 00:39:03 +000013602 cur = fake->next;
13603 fake->next = NULL;
13604 node->last = fake;
13605
13606 if (cur != NULL) {
13607 cur->prev = NULL;
13608 }
13609
13610 *lst = cur;
13611
13612 while (cur != NULL) {
13613 cur->parent = NULL;
13614 cur = cur->next;
13615 }
13616
13617 xmlUnlinkNode(fake);
13618 xmlFreeNode(fake);
13619
13620
13621 if (ret != XML_ERR_OK) {
13622 xmlFreeNodeList(*lst);
13623 *lst = NULL;
13624 }
William M. Brackc3f81342004-10-03 01:22:44 +000013625
William M. Brackb7b54de2004-10-06 16:38:01 +000013626 if (doc->dict != NULL)
13627 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013628 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013629
Daniel Veillard29b17482004-08-16 00:39:03 +000013630 return(ret);
13631#else /* !SAX2 */
13632 return(XML_ERR_INTERNAL_ERROR);
13633#endif
13634}
13635
Daniel Veillard81273902003-09-30 00:43:48 +000013636#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013637/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013638 * xmlParseBalancedChunkMemoryRecover:
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013639 * @doc: the document the chunk pertains to (must not be NULL)
13640 * @sax: the SAX handler block (possibly NULL)
Daniel Veillard58e44c92002-08-02 22:19:49 +000013641 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13642 * @depth: Used for loop detection, use 0
13643 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13644 * @lst: the return value for the set of parsed nodes
13645 * @recover: return nodes even if the data is broken (use 0)
13646 *
13647 *
13648 * Parse a well-balanced chunk of an XML document
13649 * called by the parser
13650 * The allowed sequence for the Well Balanced Chunk is the one defined by
13651 * the content production in the XML grammar:
13652 *
13653 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13654 *
13655 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13656 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013657 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013658 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013659 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13660 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013661 */
13662int
13663xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013664 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013665 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013666 xmlParserCtxtPtr ctxt;
13667 xmlDocPtr newDoc;
13668 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013669 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013670 int size;
13671 int ret = 0;
13672
Daniel Veillard0161e632008-08-28 15:36:32 +000013673 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013674 return(XML_ERR_ENTITY_LOOP);
13675 }
13676
13677
Daniel Veillardcda96922001-08-21 10:56:31 +000013678 if (lst != NULL)
13679 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013680 if (string == NULL)
13681 return(-1);
13682
13683 size = xmlStrlen(string);
13684
13685 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13686 if (ctxt == NULL) return(-1);
13687 ctxt->userData = ctxt;
13688 if (sax != NULL) {
13689 oldsax = ctxt->sax;
13690 ctxt->sax = sax;
13691 if (user_data != NULL)
13692 ctxt->userData = user_data;
13693 }
13694 newDoc = xmlNewDoc(BAD_CAST "1.0");
13695 if (newDoc == NULL) {
13696 xmlFreeParserCtxt(ctxt);
13697 return(-1);
13698 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013699 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013700 if ((doc != NULL) && (doc->dict != NULL)) {
13701 xmlDictFree(ctxt->dict);
13702 ctxt->dict = doc->dict;
13703 xmlDictReference(ctxt->dict);
13704 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13705 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13706 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13707 ctxt->dictNames = 1;
13708 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013709 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013710 }
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013711 /* doc == NULL is only supported for historic reasons */
Owen Taylor3473f882001-02-23 17:55:21 +000013712 if (doc != NULL) {
13713 newDoc->intSubset = doc->intSubset;
13714 newDoc->extSubset = doc->extSubset;
13715 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013716 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13717 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013718 if (sax != NULL)
13719 ctxt->sax = oldsax;
13720 xmlFreeParserCtxt(ctxt);
13721 newDoc->intSubset = NULL;
13722 newDoc->extSubset = NULL;
13723 xmlFreeDoc(newDoc);
13724 return(-1);
13725 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013726 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13727 nodePush(ctxt, newRoot);
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013728 /* doc == NULL is only supported for historic reasons */
Owen Taylor3473f882001-02-23 17:55:21 +000013729 if (doc == NULL) {
13730 ctxt->myDoc = newDoc;
13731 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013732 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013733 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013734 /* Ensure that doc has XML spec namespace */
13735 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13736 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013737 }
13738 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardad88b542017-12-08 09:42:31 +010013739 ctxt->input_id = 2;
Owen Taylor3473f882001-02-23 17:55:21 +000013740 ctxt->depth = depth;
13741
13742 /*
13743 * Doing validity checking on chunk doesn't make sense
13744 */
13745 ctxt->validate = 0;
13746 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013747 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013748
Daniel Veillardb39bc392002-10-26 19:29:51 +000013749 if ( doc != NULL ){
13750 content = doc->children;
13751 doc->children = NULL;
13752 xmlParseContent(ctxt);
13753 doc->children = content;
13754 }
13755 else {
13756 xmlParseContent(ctxt);
13757 }
Owen Taylor3473f882001-02-23 17:55:21 +000013758 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013759 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013760 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013761 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013762 }
13763 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013764 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013765 }
13766
13767 if (!ctxt->wellFormed) {
13768 if (ctxt->errNo == 0)
13769 ret = 1;
13770 else
13771 ret = ctxt->errNo;
13772 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013773 ret = 0;
13774 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013775
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013776 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13777 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013778
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013779 /*
13780 * Return the newly created nodeset after unlinking it from
13781 * they pseudo parent.
13782 */
13783 cur = newDoc->children->children;
13784 *lst = cur;
13785 while (cur != NULL) {
13786 xmlSetTreeDoc(cur, doc);
13787 cur->parent = NULL;
13788 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013789 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013790 newDoc->children->children = NULL;
13791 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013792
13793 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013794 ctxt->sax = oldsax;
13795 xmlFreeParserCtxt(ctxt);
13796 newDoc->intSubset = NULL;
13797 newDoc->extSubset = NULL;
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013798 /* This leaks the namespace list if doc == NULL */
Rob Richardsa02f1992006-09-16 14:04:26 +000013799 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013800 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013801
Owen Taylor3473f882001-02-23 17:55:21 +000013802 return(ret);
13803}
13804
13805/**
13806 * xmlSAXParseEntity:
13807 * @sax: the SAX handler block
13808 * @filename: the filename
13809 *
13810 * parse an XML external entity out of context and build a tree.
13811 * It use the given SAX function block to handle the parsing callback.
13812 * If sax is NULL, fallback to the default DOM tree building routines.
13813 *
13814 * [78] extParsedEnt ::= TextDecl? content
13815 *
13816 * This correspond to a "Well Balanced" chunk
13817 *
13818 * Returns the resulting document tree
13819 */
13820
13821xmlDocPtr
13822xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13823 xmlDocPtr ret;
13824 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013825
13826 ctxt = xmlCreateFileParserCtxt(filename);
13827 if (ctxt == NULL) {
13828 return(NULL);
13829 }
13830 if (sax != NULL) {
13831 if (ctxt->sax != NULL)
13832 xmlFree(ctxt->sax);
13833 ctxt->sax = sax;
13834 ctxt->userData = NULL;
13835 }
13836
Owen Taylor3473f882001-02-23 17:55:21 +000013837 xmlParseExtParsedEnt(ctxt);
13838
13839 if (ctxt->wellFormed)
13840 ret = ctxt->myDoc;
13841 else {
13842 ret = NULL;
13843 xmlFreeDoc(ctxt->myDoc);
13844 ctxt->myDoc = NULL;
13845 }
13846 if (sax != NULL)
13847 ctxt->sax = NULL;
13848 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013849
Owen Taylor3473f882001-02-23 17:55:21 +000013850 return(ret);
13851}
13852
13853/**
13854 * xmlParseEntity:
13855 * @filename: the filename
13856 *
13857 * parse an XML external entity out of context and build a tree.
13858 *
13859 * [78] extParsedEnt ::= TextDecl? content
13860 *
13861 * This correspond to a "Well Balanced" chunk
13862 *
13863 * Returns the resulting document tree
13864 */
13865
13866xmlDocPtr
13867xmlParseEntity(const char *filename) {
13868 return(xmlSAXParseEntity(NULL, filename));
13869}
Daniel Veillard81273902003-09-30 00:43:48 +000013870#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013871
13872/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013873 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013874 * @URL: the entity URL
13875 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013876 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013877 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013878 *
13879 * Create a parser context for an external entity
13880 * Automatic support for ZLIB/Compress compressed document is provided
13881 * by default if found at compile-time.
13882 *
13883 * Returns the new parser context or NULL
13884 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013885static xmlParserCtxtPtr
13886xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13887 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013888 xmlParserCtxtPtr ctxt;
13889 xmlParserInputPtr inputStream;
13890 char *directory = NULL;
13891 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013892
Owen Taylor3473f882001-02-23 17:55:21 +000013893 ctxt = xmlNewParserCtxt();
13894 if (ctxt == NULL) {
13895 return(NULL);
13896 }
13897
Daniel Veillard48247b42009-07-10 16:12:46 +020013898 if (pctx != NULL) {
13899 ctxt->options = pctx->options;
13900 ctxt->_private = pctx->_private;
Daniel Veillardad88b542017-12-08 09:42:31 +010013901 /*
13902 * this is a subparser of pctx, so the input_id should be
13903 * incremented to distinguish from main entity
13904 */
13905 ctxt->input_id = pctx->input_id + 1;
Rob Richards9c0aa472009-03-26 18:10:19 +000013906 }
13907
Haibo Huangcfd91dc2020-07-30 23:01:33 -070013908 /* Don't read from stdin. */
13909 if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13910 URL = BAD_CAST "./-";
13911
Owen Taylor3473f882001-02-23 17:55:21 +000013912 uri = xmlBuildURI(URL, base);
13913
13914 if (uri == NULL) {
13915 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13916 if (inputStream == NULL) {
13917 xmlFreeParserCtxt(ctxt);
13918 return(NULL);
13919 }
13920
13921 inputPush(ctxt, inputStream);
13922
13923 if ((ctxt->directory == NULL) && (directory == NULL))
13924 directory = xmlParserGetDirectory((char *)URL);
13925 if ((ctxt->directory == NULL) && (directory != NULL))
13926 ctxt->directory = directory;
13927 } else {
13928 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13929 if (inputStream == NULL) {
13930 xmlFree(uri);
13931 xmlFreeParserCtxt(ctxt);
13932 return(NULL);
13933 }
13934
13935 inputPush(ctxt, inputStream);
13936
13937 if ((ctxt->directory == NULL) && (directory == NULL))
13938 directory = xmlParserGetDirectory((char *)uri);
13939 if ((ctxt->directory == NULL) && (directory != NULL))
13940 ctxt->directory = directory;
13941 xmlFree(uri);
13942 }
Owen Taylor3473f882001-02-23 17:55:21 +000013943 return(ctxt);
13944}
13945
Rob Richards9c0aa472009-03-26 18:10:19 +000013946/**
13947 * xmlCreateEntityParserCtxt:
13948 * @URL: the entity URL
13949 * @ID: the entity PUBLIC ID
13950 * @base: a possible base for the target URI
13951 *
13952 * Create a parser context for an external entity
13953 * Automatic support for ZLIB/Compress compressed document is provided
13954 * by default if found at compile-time.
13955 *
13956 * Returns the new parser context or NULL
13957 */
13958xmlParserCtxtPtr
13959xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13960 const xmlChar *base) {
13961 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13962
13963}
13964
Owen Taylor3473f882001-02-23 17:55:21 +000013965/************************************************************************
13966 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013967 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013968 * *
13969 ************************************************************************/
13970
13971/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013972 * xmlCreateURLParserCtxt:
13973 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013974 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013975 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013976 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013977 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013978 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013979 *
13980 * Returns the new parser context or NULL
13981 */
13982xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013983xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013984{
13985 xmlParserCtxtPtr ctxt;
13986 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013987 char *directory = NULL;
13988
Owen Taylor3473f882001-02-23 17:55:21 +000013989 ctxt = xmlNewParserCtxt();
13990 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013991 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013992 return(NULL);
13993 }
13994
Daniel Veillarddf292f72005-01-16 19:00:15 +000013995 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013996 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013997 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013998
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013999 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014000 if (inputStream == NULL) {
14001 xmlFreeParserCtxt(ctxt);
14002 return(NULL);
14003 }
14004
Owen Taylor3473f882001-02-23 17:55:21 +000014005 inputPush(ctxt, inputStream);
14006 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014007 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014008 if ((ctxt->directory == NULL) && (directory != NULL))
14009 ctxt->directory = directory;
14010
14011 return(ctxt);
14012}
14013
Daniel Veillard61b93382003-11-03 14:28:31 +000014014/**
14015 * xmlCreateFileParserCtxt:
14016 * @filename: the filename
14017 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014018 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014019 * Automatic support for ZLIB/Compress compressed document is provided
14020 * by default if found at compile-time.
14021 *
14022 * Returns the new parser context or NULL
14023 */
14024xmlParserCtxtPtr
14025xmlCreateFileParserCtxt(const char *filename)
14026{
14027 return(xmlCreateURLParserCtxt(filename, 0));
14028}
14029
Daniel Veillard81273902003-09-30 00:43:48 +000014030#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014031/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014032 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014033 * @sax: the SAX handler block
14034 * @filename: the filename
14035 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14036 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014037 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014038 *
14039 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14040 * compressed document is provided by default if found at compile-time.
14041 * It use the given SAX function block to handle the parsing callback.
14042 * If sax is NULL, fallback to the default DOM tree building routines.
14043 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014044 * User data (void *) is stored within the parser context in the
14045 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014046 *
Owen Taylor3473f882001-02-23 17:55:21 +000014047 * Returns the resulting document tree
14048 */
14049
14050xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014051xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14052 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014053 xmlDocPtr ret;
14054 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014055
Daniel Veillard635ef722001-10-29 11:48:19 +000014056 xmlInitParser();
14057
Owen Taylor3473f882001-02-23 17:55:21 +000014058 ctxt = xmlCreateFileParserCtxt(filename);
14059 if (ctxt == NULL) {
14060 return(NULL);
14061 }
14062 if (sax != NULL) {
14063 if (ctxt->sax != NULL)
14064 xmlFree(ctxt->sax);
14065 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014066 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014067 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014068 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014069 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014070 }
Owen Taylor3473f882001-02-23 17:55:21 +000014071
Daniel Veillard37d2d162008-03-14 10:54:00 +000014072 if (ctxt->directory == NULL)
14073 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014074
Daniel Veillarddad3f682002-11-17 16:47:27 +000014075 ctxt->recovery = recovery;
14076
Owen Taylor3473f882001-02-23 17:55:21 +000014077 xmlParseDocument(ctxt);
14078
William M. Brackc07329e2003-09-08 01:57:30 +000014079 if ((ctxt->wellFormed) || recovery) {
14080 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014081 if (ret != NULL) {
14082 if (ctxt->input->buf->compressed > 0)
14083 ret->compression = 9;
14084 else
14085 ret->compression = ctxt->input->buf->compressed;
14086 }
William M. Brackc07329e2003-09-08 01:57:30 +000014087 }
Owen Taylor3473f882001-02-23 17:55:21 +000014088 else {
14089 ret = NULL;
14090 xmlFreeDoc(ctxt->myDoc);
14091 ctxt->myDoc = NULL;
14092 }
14093 if (sax != NULL)
14094 ctxt->sax = NULL;
14095 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014096
Owen Taylor3473f882001-02-23 17:55:21 +000014097 return(ret);
14098}
14099
14100/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014101 * xmlSAXParseFile:
14102 * @sax: the SAX handler block
14103 * @filename: the filename
14104 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14105 * documents
14106 *
14107 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14108 * compressed document is provided by default if found at compile-time.
14109 * It use the given SAX function block to handle the parsing callback.
14110 * If sax is NULL, fallback to the default DOM tree building routines.
14111 *
14112 * Returns the resulting document tree
14113 */
14114
14115xmlDocPtr
14116xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14117 int recovery) {
14118 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14119}
14120
14121/**
Owen Taylor3473f882001-02-23 17:55:21 +000014122 * xmlRecoverDoc:
14123 * @cur: a pointer to an array of xmlChar
14124 *
14125 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014126 * In the case the document is not Well Formed, a attempt to build a
14127 * tree is tried anyway
14128 *
14129 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014130 */
14131
14132xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014133xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014134 return(xmlSAXParseDoc(NULL, cur, 1));
14135}
14136
14137/**
14138 * xmlParseFile:
14139 * @filename: the filename
14140 *
14141 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14142 * compressed document is provided by default if found at compile-time.
14143 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014144 * Returns the resulting document tree if the file was wellformed,
14145 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014146 */
14147
14148xmlDocPtr
14149xmlParseFile(const char *filename) {
14150 return(xmlSAXParseFile(NULL, filename, 0));
14151}
14152
14153/**
14154 * xmlRecoverFile:
14155 * @filename: the filename
14156 *
14157 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14158 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014159 * In the case the document is not Well Formed, it attempts to build
14160 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014161 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014162 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014163 */
14164
14165xmlDocPtr
14166xmlRecoverFile(const char *filename) {
14167 return(xmlSAXParseFile(NULL, filename, 1));
14168}
14169
14170
14171/**
14172 * xmlSetupParserForBuffer:
14173 * @ctxt: an XML parser context
14174 * @buffer: a xmlChar * buffer
14175 * @filename: a file name
14176 *
14177 * Setup the parser context to parse a new buffer; Clears any prior
14178 * contents from the parser context. The buffer parameter must not be
14179 * NULL, but the filename parameter can be
14180 */
14181void
14182xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14183 const char* filename)
14184{
14185 xmlParserInputPtr input;
14186
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014187 if ((ctxt == NULL) || (buffer == NULL))
14188 return;
14189
Owen Taylor3473f882001-02-23 17:55:21 +000014190 input = xmlNewInputStream(ctxt);
14191 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014192 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014193 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014194 return;
14195 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014196
Owen Taylor3473f882001-02-23 17:55:21 +000014197 xmlClearParserCtxt(ctxt);
14198 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014199 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014200 input->base = buffer;
14201 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014202 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014203 inputPush(ctxt, input);
14204}
14205
14206/**
14207 * xmlSAXUserParseFile:
14208 * @sax: a SAX handler
14209 * @user_data: The user data returned on SAX callbacks
14210 * @filename: a file name
14211 *
14212 * parse an XML file and call the given SAX handler routines.
14213 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014214 *
Owen Taylor3473f882001-02-23 17:55:21 +000014215 * Returns 0 in case of success or a error number otherwise
14216 */
14217int
14218xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14219 const char *filename) {
14220 int ret = 0;
14221 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014222
Owen Taylor3473f882001-02-23 17:55:21 +000014223 ctxt = xmlCreateFileParserCtxt(filename);
14224 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014225 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014226 xmlFree(ctxt->sax);
14227 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014228 xmlDetectSAX2(ctxt);
14229
Owen Taylor3473f882001-02-23 17:55:21 +000014230 if (user_data != NULL)
14231 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014232
Owen Taylor3473f882001-02-23 17:55:21 +000014233 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014234
Owen Taylor3473f882001-02-23 17:55:21 +000014235 if (ctxt->wellFormed)
14236 ret = 0;
14237 else {
14238 if (ctxt->errNo != 0)
14239 ret = ctxt->errNo;
14240 else
14241 ret = -1;
14242 }
14243 if (sax != NULL)
14244 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014245 if (ctxt->myDoc != NULL) {
14246 xmlFreeDoc(ctxt->myDoc);
14247 ctxt->myDoc = NULL;
14248 }
Owen Taylor3473f882001-02-23 17:55:21 +000014249 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014250
Owen Taylor3473f882001-02-23 17:55:21 +000014251 return ret;
14252}
Daniel Veillard81273902003-09-30 00:43:48 +000014253#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014254
14255/************************************************************************
14256 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014257 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014258 * *
14259 ************************************************************************/
14260
14261/**
14262 * xmlCreateMemoryParserCtxt:
14263 * @buffer: a pointer to a char array
14264 * @size: the size of the array
14265 *
14266 * Create a parser context for an XML in-memory document.
14267 *
14268 * Returns the new parser context or NULL
14269 */
14270xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014271xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014272 xmlParserCtxtPtr ctxt;
14273 xmlParserInputPtr input;
14274 xmlParserInputBufferPtr buf;
14275
14276 if (buffer == NULL)
14277 return(NULL);
14278 if (size <= 0)
14279 return(NULL);
14280
14281 ctxt = xmlNewParserCtxt();
14282 if (ctxt == NULL)
14283 return(NULL);
14284
Daniel Veillard53350552003-09-18 13:35:51 +000014285 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014286 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014287 if (buf == NULL) {
14288 xmlFreeParserCtxt(ctxt);
14289 return(NULL);
14290 }
Owen Taylor3473f882001-02-23 17:55:21 +000014291
14292 input = xmlNewInputStream(ctxt);
14293 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014294 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014295 xmlFreeParserCtxt(ctxt);
14296 return(NULL);
14297 }
14298
14299 input->filename = NULL;
14300 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014301 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014302
14303 inputPush(ctxt, input);
14304 return(ctxt);
14305}
14306
Daniel Veillard81273902003-09-30 00:43:48 +000014307#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014308/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014309 * xmlSAXParseMemoryWithData:
14310 * @sax: the SAX handler block
14311 * @buffer: an pointer to a char array
14312 * @size: the size of the array
14313 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14314 * documents
14315 * @data: the userdata
14316 *
14317 * parse an XML in-memory block and use the given SAX function block
14318 * to handle the parsing callback. If sax is NULL, fallback to the default
14319 * DOM tree building routines.
14320 *
14321 * User data (void *) is stored within the parser context in the
14322 * context's _private member, so it is available nearly everywhere in libxml
14323 *
14324 * Returns the resulting document tree
14325 */
14326
14327xmlDocPtr
14328xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14329 int size, int recovery, void *data) {
14330 xmlDocPtr ret;
14331 xmlParserCtxtPtr ctxt;
14332
Daniel Veillardab2a7632009-07-09 08:45:03 +020014333 xmlInitParser();
14334
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014335 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14336 if (ctxt == NULL) return(NULL);
14337 if (sax != NULL) {
14338 if (ctxt->sax != NULL)
14339 xmlFree(ctxt->sax);
14340 ctxt->sax = sax;
14341 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014342 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014343 if (data!=NULL) {
14344 ctxt->_private=data;
14345 }
14346
Daniel Veillardadba5f12003-04-04 16:09:01 +000014347 ctxt->recovery = recovery;
14348
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014349 xmlParseDocument(ctxt);
14350
14351 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14352 else {
14353 ret = NULL;
14354 xmlFreeDoc(ctxt->myDoc);
14355 ctxt->myDoc = NULL;
14356 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014357 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014358 ctxt->sax = NULL;
14359 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014360
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014361 return(ret);
14362}
14363
14364/**
Owen Taylor3473f882001-02-23 17:55:21 +000014365 * xmlSAXParseMemory:
14366 * @sax: the SAX handler block
14367 * @buffer: an pointer to a char array
14368 * @size: the size of the array
14369 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14370 * documents
14371 *
14372 * parse an XML in-memory block and use the given SAX function block
14373 * to handle the parsing callback. If sax is NULL, fallback to the default
14374 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014375 *
Owen Taylor3473f882001-02-23 17:55:21 +000014376 * Returns the resulting document tree
14377 */
14378xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014379xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14380 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014381 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014382}
14383
14384/**
14385 * xmlParseMemory:
14386 * @buffer: an pointer to a char array
14387 * @size: the size of the array
14388 *
14389 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014390 *
Owen Taylor3473f882001-02-23 17:55:21 +000014391 * Returns the resulting document tree
14392 */
14393
Daniel Veillard50822cb2001-07-26 20:05:51 +000014394xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014395 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14396}
14397
14398/**
14399 * xmlRecoverMemory:
14400 * @buffer: an pointer to a char array
14401 * @size: the size of the array
14402 *
14403 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014404 * In the case the document is not Well Formed, an attempt to
14405 * build a tree is tried anyway
14406 *
14407 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014408 */
14409
Daniel Veillard50822cb2001-07-26 20:05:51 +000014410xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014411 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14412}
14413
14414/**
14415 * xmlSAXUserParseMemory:
14416 * @sax: a SAX handler
14417 * @user_data: The user data returned on SAX callbacks
14418 * @buffer: an in-memory XML document input
14419 * @size: the length of the XML document in bytes
14420 *
14421 * A better SAX parsing routine.
14422 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014423 *
Owen Taylor3473f882001-02-23 17:55:21 +000014424 * Returns 0 in case of success or a error number otherwise
14425 */
14426int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014427 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014428 int ret = 0;
14429 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014430
14431 xmlInitParser();
14432
Owen Taylor3473f882001-02-23 17:55:21 +000014433 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14434 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014435 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14436 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014437 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014438 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014439
Daniel Veillard30211a02001-04-26 09:33:18 +000014440 if (user_data != NULL)
14441 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014442
Owen Taylor3473f882001-02-23 17:55:21 +000014443 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014444
Owen Taylor3473f882001-02-23 17:55:21 +000014445 if (ctxt->wellFormed)
14446 ret = 0;
14447 else {
14448 if (ctxt->errNo != 0)
14449 ret = ctxt->errNo;
14450 else
14451 ret = -1;
14452 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014453 if (sax != NULL)
14454 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014455 if (ctxt->myDoc != NULL) {
14456 xmlFreeDoc(ctxt->myDoc);
14457 ctxt->myDoc = NULL;
14458 }
Owen Taylor3473f882001-02-23 17:55:21 +000014459 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014460
Owen Taylor3473f882001-02-23 17:55:21 +000014461 return ret;
14462}
Daniel Veillard81273902003-09-30 00:43:48 +000014463#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014464
14465/**
14466 * xmlCreateDocParserCtxt:
14467 * @cur: a pointer to an array of xmlChar
14468 *
14469 * Creates a parser context for an XML in-memory document.
14470 *
14471 * Returns the new parser context or NULL
14472 */
14473xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014474xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014475 int len;
14476
14477 if (cur == NULL)
14478 return(NULL);
14479 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014480 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014481}
14482
Daniel Veillard81273902003-09-30 00:43:48 +000014483#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014484/**
14485 * xmlSAXParseDoc:
14486 * @sax: the SAX handler block
14487 * @cur: a pointer to an array of xmlChar
14488 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14489 * documents
14490 *
14491 * parse an XML in-memory document and build a tree.
14492 * It use the given SAX function block to handle the parsing callback.
14493 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014494 *
Owen Taylor3473f882001-02-23 17:55:21 +000014495 * Returns the resulting document tree
14496 */
14497
14498xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014499xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014500 xmlDocPtr ret;
14501 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014502 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014503
Daniel Veillard38936062004-11-04 17:45:11 +000014504 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014505
14506
14507 ctxt = xmlCreateDocParserCtxt(cur);
14508 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014509 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014510 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014511 ctxt->sax = sax;
14512 ctxt->userData = NULL;
14513 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014514 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014515
14516 xmlParseDocument(ctxt);
14517 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14518 else {
14519 ret = NULL;
14520 xmlFreeDoc(ctxt->myDoc);
14521 ctxt->myDoc = NULL;
14522 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014523 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014524 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014525 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014526
Owen Taylor3473f882001-02-23 17:55:21 +000014527 return(ret);
14528}
14529
14530/**
14531 * xmlParseDoc:
14532 * @cur: a pointer to an array of xmlChar
14533 *
14534 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014535 *
Owen Taylor3473f882001-02-23 17:55:21 +000014536 * Returns the resulting document tree
14537 */
14538
14539xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014540xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014541 return(xmlSAXParseDoc(NULL, cur, 0));
14542}
Daniel Veillard81273902003-09-30 00:43:48 +000014543#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014544
Daniel Veillard81273902003-09-30 00:43:48 +000014545#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014546/************************************************************************
14547 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014548 * Specific function to keep track of entities references *
14549 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014550 * *
14551 ************************************************************************/
14552
14553static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14554
14555/**
14556 * xmlAddEntityReference:
14557 * @ent : A valid entity
14558 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014559 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014560 *
14561 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14562 */
14563static void
14564xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14565 xmlNodePtr lastNode)
14566{
14567 if (xmlEntityRefFunc != NULL) {
14568 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14569 }
14570}
14571
14572
14573/**
14574 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014575 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014576 *
14577 * Set the function to call call back when a xml reference has been made
14578 */
14579void
14580xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14581{
14582 xmlEntityRefFunc = func;
14583}
Daniel Veillard81273902003-09-30 00:43:48 +000014584#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014585
14586/************************************************************************
14587 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014588 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014589 * *
14590 ************************************************************************/
14591
14592#ifdef LIBXML_XPATH_ENABLED
14593#include <libxml/xpath.h>
14594#endif
14595
Daniel Veillardffa3c742005-07-21 13:24:09 +000014596extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014597static int xmlParserInitialized = 0;
14598
14599/**
14600 * xmlInitParser:
14601 *
14602 * Initialization function for the XML parser.
14603 * This is not reentrant. Call once before processing in case of
14604 * use in multithreaded programs.
14605 */
14606
14607void
14608xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014609 if (xmlParserInitialized != 0)
14610 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014611
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014612#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14613 atexit(xmlCleanupParser);
14614#endif
14615
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014616#ifdef LIBXML_THREAD_ENABLED
14617 __xmlGlobalInitMutexLock();
14618 if (xmlParserInitialized == 0) {
14619#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014620 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014621 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014622 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14623 (xmlGenericError == NULL))
14624 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014625 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014626 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014627 xmlInitCharEncodingHandlers();
14628 xmlDefaultSAXHandlerInit();
14629 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014630#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014631 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014632#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014633#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014634 htmlInitAutoClose();
14635 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014636#endif
14637#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014638 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014639#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014640 xmlParserInitialized = 1;
14641#ifdef LIBXML_THREAD_ENABLED
14642 }
14643 __xmlGlobalInitMutexUnlock();
14644#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014645}
14646
14647/**
14648 * xmlCleanupParser:
14649 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014650 * This function name is somewhat misleading. It does not clean up
14651 * parser state, it cleans up memory allocated by the library itself.
14652 * It is a cleanup function for the XML library. It tries to reclaim all
14653 * related global memory allocated for the library processing.
14654 * It doesn't deallocate any document related memory. One should
14655 * call xmlCleanupParser() only when the process has finished using
14656 * the library and all XML/HTML documents built with it.
14657 * See also xmlInitParser() which has the opposite function of preparing
14658 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014659 *
14660 * WARNING: if your application is multithreaded or has plugin support
14661 * calling this may crash the application if another thread or
14662 * a plugin is still using libxml2. It's sometimes very hard to
14663 * guess if libxml2 is in use in the application, some libraries
14664 * or plugins may use it without notice. In case of doubt abstain
14665 * from calling this function or do it just before calling exit()
14666 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014667 */
14668
14669void
14670xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014671 if (!xmlParserInitialized)
14672 return;
14673
Owen Taylor3473f882001-02-23 17:55:21 +000014674 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014675#ifdef LIBXML_CATALOG_ENABLED
14676 xmlCatalogCleanup();
14677#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014678 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014679 xmlCleanupInputCallbacks();
14680#ifdef LIBXML_OUTPUT_ENABLED
14681 xmlCleanupOutputCallbacks();
14682#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014683#ifdef LIBXML_SCHEMAS_ENABLED
14684 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014685 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014686#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014687 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014688 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014689 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014690 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014691 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014692}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014693
Haibo Huangcfd91dc2020-07-30 23:01:33 -070014694#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14695 !defined(_WIN32)
14696static void
14697ATTRIBUTE_DESTRUCTOR
14698xmlDestructor(void) {
14699 xmlCleanupParser();
14700}
14701#endif
14702
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014703/************************************************************************
14704 * *
14705 * New set (2.6.0) of simpler and more flexible APIs *
14706 * *
14707 ************************************************************************/
14708
14709/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014710 * DICT_FREE:
14711 * @str: a string
14712 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014713 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014714 * current scope
14715 */
14716#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014717 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014718 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14719 xmlFree((char *)(str));
14720
14721/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014722 * xmlCtxtReset:
14723 * @ctxt: an XML parser context
14724 *
14725 * Reset a parser context
14726 */
14727void
14728xmlCtxtReset(xmlParserCtxtPtr ctxt)
14729{
14730 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014731 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014732
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014733 if (ctxt == NULL)
14734 return;
14735
14736 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014737
14738 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14739 xmlFreeInputStream(input);
14740 }
14741 ctxt->inputNr = 0;
14742 ctxt->input = NULL;
14743
14744 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014745 if (ctxt->spaceTab != NULL) {
14746 ctxt->spaceTab[0] = -1;
14747 ctxt->space = &ctxt->spaceTab[0];
14748 } else {
14749 ctxt->space = NULL;
14750 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014751
14752
14753 ctxt->nodeNr = 0;
14754 ctxt->node = NULL;
14755
14756 ctxt->nameNr = 0;
14757 ctxt->name = NULL;
14758
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014759 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014760 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014761 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014762 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014763 DICT_FREE(ctxt->directory);
14764 ctxt->directory = NULL;
14765 DICT_FREE(ctxt->extSubURI);
14766 ctxt->extSubURI = NULL;
14767 DICT_FREE(ctxt->extSubSystem);
14768 ctxt->extSubSystem = NULL;
14769 if (ctxt->myDoc != NULL)
14770 xmlFreeDoc(ctxt->myDoc);
14771 ctxt->myDoc = NULL;
14772
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014773 ctxt->standalone = -1;
14774 ctxt->hasExternalSubset = 0;
14775 ctxt->hasPErefs = 0;
14776 ctxt->html = 0;
14777 ctxt->external = 0;
14778 ctxt->instate = XML_PARSER_START;
14779 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014780
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014781 ctxt->wellFormed = 1;
14782 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014783 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014784 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014785#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014786 ctxt->vctxt.userData = ctxt;
14787 ctxt->vctxt.error = xmlParserValidityError;
14788 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014789#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014790 ctxt->record_info = 0;
14791 ctxt->nbChars = 0;
14792 ctxt->checkIndex = 0;
14793 ctxt->inSubset = 0;
14794 ctxt->errNo = XML_ERR_OK;
14795 ctxt->depth = 0;
14796 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14797 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014798 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014799 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014800 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014801 xmlInitNodeInfoSeq(&ctxt->node_seq);
14802
14803 if (ctxt->attsDefault != NULL) {
Nick Wellnhofere03f0a12017-11-09 16:42:47 +010014804 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014805 ctxt->attsDefault = NULL;
14806 }
14807 if (ctxt->attsSpecial != NULL) {
14808 xmlHashFree(ctxt->attsSpecial, NULL);
14809 ctxt->attsSpecial = NULL;
14810 }
14811
Daniel Veillard4432df22003-09-28 18:58:27 +000014812#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014813 if (ctxt->catalogs != NULL)
14814 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014815#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014816 if (ctxt->lastError.code != XML_ERR_OK)
14817 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014818}
14819
14820/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014821 * xmlCtxtResetPush:
14822 * @ctxt: an XML parser context
14823 * @chunk: a pointer to an array of chars
14824 * @size: number of chars in the array
14825 * @filename: an optional file name or URI
14826 * @encoding: the document encoding, or NULL
14827 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014828 * Reset a push parser context
14829 *
14830 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014831 */
14832int
14833xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14834 int size, const char *filename, const char *encoding)
14835{
14836 xmlParserInputPtr inputStream;
14837 xmlParserInputBufferPtr buf;
14838 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14839
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014840 if (ctxt == NULL)
14841 return(1);
14842
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014843 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14844 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14845
14846 buf = xmlAllocParserInputBuffer(enc);
14847 if (buf == NULL)
14848 return(1);
14849
14850 if (ctxt == NULL) {
14851 xmlFreeParserInputBuffer(buf);
14852 return(1);
14853 }
14854
14855 xmlCtxtReset(ctxt);
14856
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014857 if (filename == NULL) {
14858 ctxt->directory = NULL;
14859 } else {
14860 ctxt->directory = xmlParserGetDirectory(filename);
14861 }
14862
14863 inputStream = xmlNewInputStream(ctxt);
14864 if (inputStream == NULL) {
14865 xmlFreeParserInputBuffer(buf);
14866 return(1);
14867 }
14868
14869 if (filename == NULL)
14870 inputStream->filename = NULL;
14871 else
14872 inputStream->filename = (char *)
14873 xmlCanonicPath((const xmlChar *) filename);
14874 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014875 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014876
14877 inputPush(ctxt, inputStream);
14878
14879 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14880 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014881 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14882 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014883
14884 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14885
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014886 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014887#ifdef DEBUG_PUSH
14888 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14889#endif
14890 }
14891
14892 if (encoding != NULL) {
14893 xmlCharEncodingHandlerPtr hdlr;
14894
Daniel Veillard37334572008-07-31 08:20:02 +000014895 if (ctxt->encoding != NULL)
14896 xmlFree((xmlChar *) ctxt->encoding);
14897 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14898
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014899 hdlr = xmlFindCharEncodingHandler(encoding);
14900 if (hdlr != NULL) {
14901 xmlSwitchToEncoding(ctxt, hdlr);
14902 } else {
14903 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14904 "Unsupported encoding %s\n", BAD_CAST encoding);
14905 }
14906 } else if (enc != XML_CHAR_ENCODING_NONE) {
14907 xmlSwitchEncoding(ctxt, enc);
14908 }
14909
14910 return(0);
14911}
14912
Daniel Veillard37334572008-07-31 08:20:02 +000014913
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014914/**
Daniel Veillard37334572008-07-31 08:20:02 +000014915 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014916 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014917 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014918 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014919 *
14920 * Applies the options to the parser context
14921 *
14922 * Returns 0 in case of success, the set of unknown or unimplemented options
14923 * in case of error.
14924 */
Daniel Veillard37334572008-07-31 08:20:02 +000014925static int
14926xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014927{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014928 if (ctxt == NULL)
14929 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014930 if (encoding != NULL) {
14931 if (ctxt->encoding != NULL)
14932 xmlFree((xmlChar *) ctxt->encoding);
14933 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14934 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014935 if (options & XML_PARSE_RECOVER) {
14936 ctxt->recovery = 1;
14937 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014938 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014939 } else
14940 ctxt->recovery = 0;
14941 if (options & XML_PARSE_DTDLOAD) {
14942 ctxt->loadsubset = XML_DETECT_IDS;
14943 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014944 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014945 } else
14946 ctxt->loadsubset = 0;
14947 if (options & XML_PARSE_DTDATTR) {
14948 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14949 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014950 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014951 }
14952 if (options & XML_PARSE_NOENT) {
14953 ctxt->replaceEntities = 1;
14954 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14955 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014956 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014957 } else
14958 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014959 if (options & XML_PARSE_PEDANTIC) {
14960 ctxt->pedantic = 1;
14961 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014962 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014963 } else
14964 ctxt->pedantic = 0;
14965 if (options & XML_PARSE_NOBLANKS) {
14966 ctxt->keepBlanks = 0;
14967 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14968 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014969 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014970 } else
14971 ctxt->keepBlanks = 1;
14972 if (options & XML_PARSE_DTDVALID) {
14973 ctxt->validate = 1;
14974 if (options & XML_PARSE_NOWARNING)
14975 ctxt->vctxt.warning = NULL;
14976 if (options & XML_PARSE_NOERROR)
14977 ctxt->vctxt.error = NULL;
14978 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014979 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014980 } else
14981 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014982 if (options & XML_PARSE_NOWARNING) {
14983 ctxt->sax->warning = NULL;
14984 options -= XML_PARSE_NOWARNING;
14985 }
14986 if (options & XML_PARSE_NOERROR) {
14987 ctxt->sax->error = NULL;
14988 ctxt->sax->fatalError = NULL;
14989 options -= XML_PARSE_NOERROR;
14990 }
Daniel Veillard81273902003-09-30 00:43:48 +000014991#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014992 if (options & XML_PARSE_SAX1) {
14993 ctxt->sax->startElement = xmlSAX2StartElement;
14994 ctxt->sax->endElement = xmlSAX2EndElement;
14995 ctxt->sax->startElementNs = NULL;
14996 ctxt->sax->endElementNs = NULL;
14997 ctxt->sax->initialized = 1;
14998 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014999 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015000 }
Daniel Veillard81273902003-09-30 00:43:48 +000015001#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015002 if (options & XML_PARSE_NODICT) {
15003 ctxt->dictNames = 0;
15004 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015005 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015006 } else {
15007 ctxt->dictNames = 1;
15008 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015009 if (options & XML_PARSE_NOCDATA) {
15010 ctxt->sax->cdataBlock = NULL;
15011 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015012 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015013 }
15014 if (options & XML_PARSE_NSCLEAN) {
15015 ctxt->options |= XML_PARSE_NSCLEAN;
15016 options -= XML_PARSE_NSCLEAN;
15017 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015018 if (options & XML_PARSE_NONET) {
15019 ctxt->options |= XML_PARSE_NONET;
15020 options -= XML_PARSE_NONET;
15021 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015022 if (options & XML_PARSE_COMPACT) {
15023 ctxt->options |= XML_PARSE_COMPACT;
15024 options -= XML_PARSE_COMPACT;
15025 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015026 if (options & XML_PARSE_OLD10) {
15027 ctxt->options |= XML_PARSE_OLD10;
15028 options -= XML_PARSE_OLD10;
15029 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015030 if (options & XML_PARSE_NOBASEFIX) {
15031 ctxt->options |= XML_PARSE_NOBASEFIX;
15032 options -= XML_PARSE_NOBASEFIX;
15033 }
15034 if (options & XML_PARSE_HUGE) {
15035 ctxt->options |= XML_PARSE_HUGE;
15036 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015037 if (ctxt->dict != NULL)
15038 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015039 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015040 if (options & XML_PARSE_OLDSAX) {
15041 ctxt->options |= XML_PARSE_OLDSAX;
15042 options -= XML_PARSE_OLDSAX;
15043 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015044 if (options & XML_PARSE_IGNORE_ENC) {
15045 ctxt->options |= XML_PARSE_IGNORE_ENC;
15046 options -= XML_PARSE_IGNORE_ENC;
15047 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015048 if (options & XML_PARSE_BIG_LINES) {
15049 ctxt->options |= XML_PARSE_BIG_LINES;
15050 options -= XML_PARSE_BIG_LINES;
15051 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015052 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015053 return (options);
15054}
15055
15056/**
Daniel Veillard37334572008-07-31 08:20:02 +000015057 * xmlCtxtUseOptions:
15058 * @ctxt: an XML parser context
15059 * @options: a combination of xmlParserOption
15060 *
15061 * Applies the options to the parser context
15062 *
15063 * Returns 0 in case of success, the set of unknown or unimplemented options
15064 * in case of error.
15065 */
15066int
15067xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15068{
15069 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15070}
15071
15072/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015073 * xmlDoRead:
15074 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015075 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015076 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015077 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015078 * @reuse: keep the context for reuse
15079 *
15080 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015081 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015082 * Returns the resulting document tree or NULL
15083 */
15084static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015085xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15086 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015087{
15088 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015089
15090 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015091 if (encoding != NULL) {
15092 xmlCharEncodingHandlerPtr hdlr;
15093
15094 hdlr = xmlFindCharEncodingHandler(encoding);
15095 if (hdlr != NULL)
15096 xmlSwitchToEncoding(ctxt, hdlr);
15097 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015098 if ((URL != NULL) && (ctxt->input != NULL) &&
15099 (ctxt->input->filename == NULL))
15100 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015101 xmlParseDocument(ctxt);
15102 if ((ctxt->wellFormed) || ctxt->recovery)
15103 ret = ctxt->myDoc;
15104 else {
15105 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015106 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015107 xmlFreeDoc(ctxt->myDoc);
15108 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015109 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015110 ctxt->myDoc = NULL;
15111 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015112 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015113 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015114
15115 return (ret);
15116}
15117
15118/**
15119 * xmlReadDoc:
15120 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015121 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015122 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015123 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015124 *
15125 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015126 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015127 * Returns the resulting document tree
15128 */
15129xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015130xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015131{
15132 xmlParserCtxtPtr ctxt;
15133
15134 if (cur == NULL)
15135 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015136 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015137
15138 ctxt = xmlCreateDocParserCtxt(cur);
15139 if (ctxt == NULL)
15140 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015141 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015142}
15143
15144/**
15145 * xmlReadFile:
15146 * @filename: a file or URL
15147 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015148 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015149 *
15150 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015151 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015152 * Returns the resulting document tree
15153 */
15154xmlDocPtr
15155xmlReadFile(const char *filename, const char *encoding, int options)
15156{
15157 xmlParserCtxtPtr ctxt;
15158
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015159 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015160 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015161 if (ctxt == NULL)
15162 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015163 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015164}
15165
15166/**
15167 * xmlReadMemory:
15168 * @buffer: a pointer to a char array
15169 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015170 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015171 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015172 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015173 *
15174 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015175 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015176 * Returns the resulting document tree
15177 */
15178xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015179xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015180{
15181 xmlParserCtxtPtr ctxt;
15182
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015183 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015184 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15185 if (ctxt == NULL)
15186 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015187 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015188}
15189
15190/**
15191 * xmlReadFd:
15192 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015193 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015194 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015195 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015196 *
15197 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015198 * NOTE that the file descriptor will not be closed when the
15199 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015200 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015201 * Returns the resulting document tree
15202 */
15203xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015204xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015205{
15206 xmlParserCtxtPtr ctxt;
15207 xmlParserInputBufferPtr input;
15208 xmlParserInputPtr stream;
15209
15210 if (fd < 0)
15211 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015212 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015213
15214 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15215 if (input == NULL)
15216 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015217 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015218 ctxt = xmlNewParserCtxt();
15219 if (ctxt == NULL) {
15220 xmlFreeParserInputBuffer(input);
15221 return (NULL);
15222 }
15223 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15224 if (stream == NULL) {
15225 xmlFreeParserInputBuffer(input);
15226 xmlFreeParserCtxt(ctxt);
15227 return (NULL);
15228 }
15229 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015230 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015231}
15232
15233/**
15234 * xmlReadIO:
15235 * @ioread: an I/O read function
15236 * @ioclose: an I/O close function
15237 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015238 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015239 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015240 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015241 *
15242 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015243 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015244 * Returns the resulting document tree
15245 */
15246xmlDocPtr
15247xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015248 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015249{
15250 xmlParserCtxtPtr ctxt;
15251 xmlParserInputBufferPtr input;
15252 xmlParserInputPtr stream;
15253
15254 if (ioread == NULL)
15255 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015256 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015257
15258 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15259 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015260 if (input == NULL) {
15261 if (ioclose != NULL)
15262 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015263 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015264 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015265 ctxt = xmlNewParserCtxt();
15266 if (ctxt == NULL) {
15267 xmlFreeParserInputBuffer(input);
15268 return (NULL);
15269 }
15270 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15271 if (stream == NULL) {
15272 xmlFreeParserInputBuffer(input);
15273 xmlFreeParserCtxt(ctxt);
15274 return (NULL);
15275 }
15276 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015277 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015278}
15279
15280/**
15281 * xmlCtxtReadDoc:
15282 * @ctxt: an XML parser context
15283 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015284 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015285 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015286 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015287 *
15288 * parse an XML in-memory document and build a tree.
15289 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015290 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291 * Returns the resulting document tree
15292 */
15293xmlDocPtr
15294xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015295 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015296{
15297 xmlParserInputPtr stream;
15298
15299 if (cur == NULL)
15300 return (NULL);
15301 if (ctxt == NULL)
15302 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015303 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015304
15305 xmlCtxtReset(ctxt);
15306
15307 stream = xmlNewStringInputStream(ctxt, cur);
15308 if (stream == NULL) {
15309 return (NULL);
15310 }
15311 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015312 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015313}
15314
15315/**
15316 * xmlCtxtReadFile:
15317 * @ctxt: an XML parser context
15318 * @filename: a file or URL
15319 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015320 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015321 *
15322 * parse an XML file from the filesystem or the network.
15323 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015324 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015325 * Returns the resulting document tree
15326 */
15327xmlDocPtr
15328xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15329 const char *encoding, int options)
15330{
15331 xmlParserInputPtr stream;
15332
15333 if (filename == NULL)
15334 return (NULL);
15335 if (ctxt == NULL)
15336 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015337 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015338
15339 xmlCtxtReset(ctxt);
15340
Daniel Veillard29614c72004-11-26 10:47:26 +000015341 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015342 if (stream == NULL) {
15343 return (NULL);
15344 }
15345 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015346 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015347}
15348
15349/**
15350 * xmlCtxtReadMemory:
15351 * @ctxt: an XML parser context
15352 * @buffer: a pointer to a char array
15353 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015354 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015355 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015356 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015357 *
15358 * parse an XML in-memory document and build a tree.
15359 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015360 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015361 * Returns the resulting document tree
15362 */
15363xmlDocPtr
15364xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015365 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015366{
15367 xmlParserInputBufferPtr input;
15368 xmlParserInputPtr stream;
15369
15370 if (ctxt == NULL)
15371 return (NULL);
15372 if (buffer == NULL)
15373 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015374 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015375
15376 xmlCtxtReset(ctxt);
15377
15378 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15379 if (input == NULL) {
15380 return(NULL);
15381 }
15382
15383 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15384 if (stream == NULL) {
15385 xmlFreeParserInputBuffer(input);
15386 return(NULL);
15387 }
15388
15389 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015390 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015391}
15392
15393/**
15394 * xmlCtxtReadFd:
15395 * @ctxt: an XML parser context
15396 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015397 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015398 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015399 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015400 *
15401 * parse an XML from a file descriptor and build a tree.
15402 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015403 * NOTE that the file descriptor will not be closed when the
15404 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015405 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015406 * Returns the resulting document tree
15407 */
15408xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015409xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15410 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015411{
15412 xmlParserInputBufferPtr input;
15413 xmlParserInputPtr stream;
15414
15415 if (fd < 0)
15416 return (NULL);
15417 if (ctxt == NULL)
15418 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015419 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420
15421 xmlCtxtReset(ctxt);
15422
15423
15424 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15425 if (input == NULL)
15426 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015427 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15429 if (stream == NULL) {
15430 xmlFreeParserInputBuffer(input);
15431 return (NULL);
15432 }
15433 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015434 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015435}
15436
15437/**
15438 * xmlCtxtReadIO:
15439 * @ctxt: an XML parser context
15440 * @ioread: an I/O read function
15441 * @ioclose: an I/O close function
15442 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015443 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015444 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015445 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015446 *
15447 * parse an XML document from I/O functions and source and build a tree.
15448 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015449 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015450 * Returns the resulting document tree
15451 */
15452xmlDocPtr
15453xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15454 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015455 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015456 const char *encoding, int options)
15457{
15458 xmlParserInputBufferPtr input;
15459 xmlParserInputPtr stream;
15460
15461 if (ioread == NULL)
15462 return (NULL);
15463 if (ctxt == NULL)
15464 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015465 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015466
15467 xmlCtxtReset(ctxt);
15468
15469 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15470 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015471 if (input == NULL) {
15472 if (ioclose != NULL)
15473 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015474 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015475 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015476 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15477 if (stream == NULL) {
15478 xmlFreeParserInputBuffer(input);
15479 return (NULL);
15480 }
15481 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015482 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015483}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015484
15485#define bottom_parser
15486#include "elfgcchack.h"