blob: ca9fde2c87588b2c2373cdaf26208d1a81796325 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Nick Wellnhofere3890542017-10-09 00:20:01 +020041#if defined(_WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080097static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
Daniel Veillard0161e632008-08-28 15:36:32 +000099/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_PARSER_BIG_ENTITY 1000
106#define XML_PARSER_LOT_ENTITY 5000
107
108/*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114#define XML_PARSER_NON_LINEAR 10
115
116/*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800127 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000128{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800129 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
Peter Simons8f30bdf2016-04-15 11:56:55 +0200148 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200151 --ctxt->depth;
Nick Wellnhofer707ad082018-01-23 16:37:54 +0100152 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbdd66182016-05-23 12:27:58 +0800153 ent->content[0] = 0;
154 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800204 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800223 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000224 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000229 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232}
233
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000234/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000235 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000236 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000241 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000242unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000243
Daniel Veillard0fb18932003-09-07 09:14:37 +0000244
Daniel Veillard0161e632008-08-28 15:36:32 +0000245
246#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000247#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000248#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000249#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
Daniel Veillard1f972e92012-08-15 10:16:37 +0800251/**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260#define XML_PARSER_CHUNK_SIZE 100
261
Owen Taylor3473f882001-02-23 17:55:21 +0000262/*
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * List of XML prefixed PI allowed by W3C specs
264 */
265
Daniel Veillardb44025c2001-10-11 22:55:55 +0000266static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000267 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800268 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000269 NULL
270};
271
Daniel Veillarda07050d2003-10-19 14:46:32 +0000272
Owen Taylor3473f882001-02-23 17:55:21 +0000273/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200274static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000276
Daniel Veillard7d515752003-09-26 19:12:37 +0000277static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000278xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000280 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000281 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard37334572008-07-31 08:20:02 +0000283static int
284xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000286#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000287static void
288xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000290#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000291
Daniel Veillard7d515752003-09-26 19:12:37 +0000292static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000293xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000295
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000296static int
297xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
Daniel Veillarde57ec792003-09-10 10:50:59 +0000299/************************************************************************
300 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800301 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 * *
303 ************************************************************************/
304
305/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313static void
314xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316{
Daniel Veillard157fee02003-10-31 10:36:03 +0000317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200322
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000323 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000328 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339}
340
341/**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351{
352 const char *errmsg;
353
Daniel Veillard157fee02003-10-31 10:36:03 +0000354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "internal error";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "Fragment not allowed";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800443 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800462 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000464 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800465 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800502 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800505 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000506 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000507 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800508 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000509 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800511 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000512 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000513 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000525 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800528 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800530 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000531#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000532 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800533 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000534 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000535#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000536 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800537 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000538 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000539 if (ctxt != NULL)
540 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000555}
556
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000557/**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800565static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000566xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000568{
Daniel Veillard157fee02003-10-31 10:36:03 +0000569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000572 if (ctxt != NULL)
573 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581}
582
583/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800593static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000594xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000597 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000598
Daniel Veillard157fee02003-10-31 10:36:03 +0000599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000604 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000620}
621
622/**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000629 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000630 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800631static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000632xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000633 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000634{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000635 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000652 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000659 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000660}
661
662/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800671static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000672xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000673 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000674{
Daniel Veillard157fee02003-10-31 10:36:03 +0000675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000678 if (ctxt != NULL)
679 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000680 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000688}
689
690/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800701static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000702xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800703 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000704 const xmlChar *str2)
705{
Daniel Veillard157fee02003-10-31 10:36:03 +0000706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL)
710 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000711 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000720}
721
722/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800731static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000732xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000733 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000734{
Daniel Veillard157fee02003-10-31 10:36:03 +0000735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000738 if (ctxt != NULL)
739 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000749}
750
751/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800760static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000761xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763{
Daniel Veillard157fee02003-10-31 10:36:03 +0000764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000767 if (ctxt != NULL)
768 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773}
774
775/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800785static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000786xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000790{
Daniel Veillard157fee02003-10-31 10:36:03 +0000791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000794 if (ctxt != NULL)
795 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000802}
803
Daniel Veillard37334572008-07-31 08:20:02 +0000804/**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800812 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000813 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800814static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000815xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819{
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827}
828
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000829/************************************************************************
830 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800831 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832 * *
833 ************************************************************************/
834
835/**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845int
846xmlHasFeature(xmlFeature feature)
847{
848 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000849 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850#ifdef LIBXML_THREAD_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_TREE_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_PUSH_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_READER_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_WRITER_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_SAX1_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_FTP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_HTTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_VALID_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_HTML_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_C14N_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef LIBXML_XPATH_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_XPTR_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000951 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000952#ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000957 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000958#ifdef LIBXML_ICONV_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000963 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000964#ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000969 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000970#ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000975 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000976#ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000981 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000982#ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000987 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000988#ifdef LIBXML_EXPR_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000993 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000994#ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000999 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001000#ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001005 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001006#ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001011 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012#ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001017 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001018#ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020#else
1021 return(0);
1022#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001023 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001024#ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026#else
1027 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001028#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001029 case XML_WITH_ZLIB:
1030#ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032#else
1033 return(0);
1034#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001035 case XML_WITH_LZMA:
1036#ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001041 case XML_WITH_ICU:
1042#ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001047 default:
1048 break;
1049 }
1050 return(0);
1051}
1052
1053/************************************************************************
1054 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001055 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 * *
1057 ************************************************************************/
1058
1059/**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065static void
1066xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001068#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001072#else
1073 ctxt->sax2 = 1;
1074#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001081 xmlErrMemory(ctxt, NULL);
1082 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083}
1084
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085typedef struct _xmlDefAttrs xmlDefAttrs;
1086typedef xmlDefAttrs *xmlDefAttrsPtr;
1087struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001090#if __STDC_VERSION__ >= 199901L
1091 /* Using a C99 flexible array member avoids UBSan errors. */
1092 const xmlChar *values[]; /* array of localname/prefix/values/external */
1093#else
1094 const xmlChar *values[5];
1095#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001097
1098/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001099 * xmlAttrNormalizeSpace:
1100 * @src: the source string
1101 * @dst: the target string
1102 *
1103 * Normalize the space in non CDATA attribute values:
1104 * If the attribute type is not CDATA, then the XML processor MUST further
1105 * process the normalized attribute value by discarding any leading and
1106 * trailing space (#x20) characters, and by replacing sequences of space
1107 * (#x20) characters by a single space (#x20) character.
1108 * Note that the size of dst need to be at least src, and if one doesn't need
1109 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1110 * passing src as dst is just fine.
1111 *
1112 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1113 * is needed.
1114 */
1115static xmlChar *
1116xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1117{
1118 if ((src == NULL) || (dst == NULL))
1119 return(NULL);
1120
1121 while (*src == 0x20) src++;
1122 while (*src != 0) {
1123 if (*src == 0x20) {
1124 while (*src == 0x20) src++;
1125 if (*src != 0)
1126 *dst++ = 0x20;
1127 } else {
1128 *dst++ = *src++;
1129 }
1130 }
1131 *dst = 0;
1132 if (dst == src)
1133 return(NULL);
1134 return(dst);
1135}
1136
1137/**
1138 * xmlAttrNormalizeSpace2:
1139 * @src: the source string
1140 *
1141 * Normalize the space in non CDATA attribute values, a slightly more complex
1142 * front end to avoid allocation problems when running on attribute values
1143 * coming from the input.
1144 *
1145 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1146 * is needed.
1147 */
1148static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001149xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001150{
1151 int i;
1152 int remove_head = 0;
1153 int need_realloc = 0;
1154 const xmlChar *cur;
1155
1156 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1157 return(NULL);
1158 i = *len;
1159 if (i <= 0)
1160 return(NULL);
1161
1162 cur = src;
1163 while (*cur == 0x20) {
1164 cur++;
1165 remove_head++;
1166 }
1167 while (*cur != 0) {
1168 if (*cur == 0x20) {
1169 cur++;
1170 if ((*cur == 0x20) || (*cur == 0)) {
1171 need_realloc = 1;
1172 break;
1173 }
1174 } else
1175 cur++;
1176 }
1177 if (need_realloc) {
1178 xmlChar *ret;
1179
1180 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1181 if (ret == NULL) {
1182 xmlErrMemory(ctxt, NULL);
1183 return(NULL);
1184 }
1185 xmlAttrNormalizeSpace(ret, ret);
1186 *len = (int) strlen((const char *)ret);
1187 return(ret);
1188 } else if (remove_head) {
1189 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001190 memmove(src, src + remove_head, 1 + *len);
1191 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001192 }
1193 return(NULL);
1194}
1195
1196/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001197 * xmlAddDefAttrs:
1198 * @ctxt: an XML parser context
1199 * @fullname: the element fullname
1200 * @fullattr: the attribute fullname
1201 * @value: the attribute value
1202 *
1203 * Add a defaulted attribute for an element
1204 */
1205static void
1206xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1207 const xmlChar *fullname,
1208 const xmlChar *fullattr,
1209 const xmlChar *value) {
1210 xmlDefAttrsPtr defaults;
1211 int len;
1212 const xmlChar *name;
1213 const xmlChar *prefix;
1214
Daniel Veillard6a31b832008-03-26 14:06:44 +00001215 /*
1216 * Allows to detect attribute redefinitions
1217 */
1218 if (ctxt->attsSpecial != NULL) {
1219 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1220 return;
1221 }
1222
Daniel Veillarde57ec792003-09-10 10:50:59 +00001223 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001224 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001225 if (ctxt->attsDefault == NULL)
1226 goto mem_error;
1227 }
1228
1229 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001230 * split the element name into prefix:localname , the string found
1231 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 */
1233 name = xmlSplitQName3(fullname, &len);
1234 if (name == NULL) {
1235 name = xmlDictLookup(ctxt->dict, fullname, -1);
1236 prefix = NULL;
1237 } else {
1238 name = xmlDictLookup(ctxt->dict, name, -1);
1239 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1240 }
1241
1242 /*
1243 * make sure there is some storage
1244 */
1245 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1246 if (defaults == NULL) {
1247 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001248 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 if (defaults == NULL)
1250 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001251 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001252 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1257 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001259 xmlDefAttrsPtr temp;
1260
1261 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001262 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001263 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001265 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001266 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001267 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1268 defaults, NULL) < 0) {
1269 xmlFree(defaults);
1270 goto mem_error;
1271 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001272 }
1273
1274 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001275 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001276 * are within the DTD and hen not associated to namespace names.
1277 */
1278 name = xmlSplitQName3(fullattr, &len);
1279 if (name == NULL) {
1280 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1281 prefix = NULL;
1282 } else {
1283 name = xmlDictLookup(ctxt->dict, name, -1);
1284 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1285 }
1286
Daniel Veillardae0765b2008-07-31 19:54:59 +00001287 defaults->values[5 * defaults->nbAttrs] = name;
1288 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001289 /* intern the string and precompute the end */
1290 len = xmlStrlen(value);
1291 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001292 defaults->values[5 * defaults->nbAttrs + 2] = value;
1293 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1294 if (ctxt->external)
1295 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1296 else
1297 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001298 defaults->nbAttrs++;
1299
1300 return;
1301
1302mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001303 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001304 return;
1305}
1306
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001307/**
1308 * xmlAddSpecialAttr:
1309 * @ctxt: an XML parser context
1310 * @fullname: the element fullname
1311 * @fullattr: the attribute fullname
1312 * @type: the attribute type
1313 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001314 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001315 */
1316static void
1317xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1318 const xmlChar *fullname,
1319 const xmlChar *fullattr,
1320 int type)
1321{
1322 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001323 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001324 if (ctxt->attsSpecial == NULL)
1325 goto mem_error;
1326 }
1327
Daniel Veillardac4118d2008-01-11 05:27:32 +00001328 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1329 return;
1330
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001331 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001332 (void *) (ptrdiff_t) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001333 return;
1334
1335mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001336 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001337 return;
1338}
1339
Daniel Veillard4432df22003-09-28 18:58:27 +00001340/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001341 * xmlCleanSpecialAttrCallback:
1342 *
1343 * Removes CDATA attributes from the special attribute table
1344 */
1345static void
1346xmlCleanSpecialAttrCallback(void *payload, void *data,
1347 const xmlChar *fullname, const xmlChar *fullattr,
1348 const xmlChar *unused ATTRIBUTE_UNUSED) {
1349 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1350
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001351 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001352 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353 }
1354}
1355
1356/**
1357 * xmlCleanSpecialAttr:
1358 * @ctxt: an XML parser context
1359 *
1360 * Trim the list of attributes defined to remove all those of type
1361 * CDATA as they are not special. This call should be done when finishing
1362 * to parse the DTD and before starting to parse the document root.
1363 */
1364static void
1365xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1366{
1367 if (ctxt->attsSpecial == NULL)
1368 return;
1369
1370 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1371
1372 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1373 xmlHashFree(ctxt->attsSpecial, NULL);
1374 ctxt->attsSpecial = NULL;
1375 }
1376 return;
1377}
1378
1379/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001380 * xmlCheckLanguageID:
1381 * @lang: pointer to the string value
1382 *
1383 * Checks that the value conforms to the LanguageID production:
1384 *
1385 * NOTE: this is somewhat deprecated, those productions were removed from
1386 * the XML Second edition.
1387 *
1388 * [33] LanguageID ::= Langcode ('-' Subcode)*
1389 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1390 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1391 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1392 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1393 * [38] Subcode ::= ([a-z] | [A-Z])+
1394 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001395 * The current REC reference the sucessors of RFC 1766, currently 5646
1396 *
1397 * http://www.rfc-editor.org/rfc/rfc5646.txt
1398 * langtag = language
1399 * ["-" script]
1400 * ["-" region]
1401 * *("-" variant)
1402 * *("-" extension)
1403 * ["-" privateuse]
1404 * language = 2*3ALPHA ; shortest ISO 639 code
1405 * ["-" extlang] ; sometimes followed by
1406 * ; extended language subtags
1407 * / 4ALPHA ; or reserved for future use
1408 * / 5*8ALPHA ; or registered language subtag
1409 *
1410 * extlang = 3ALPHA ; selected ISO 639 codes
1411 * *2("-" 3ALPHA) ; permanently reserved
1412 *
1413 * script = 4ALPHA ; ISO 15924 code
1414 *
1415 * region = 2ALPHA ; ISO 3166-1 code
1416 * / 3DIGIT ; UN M.49 code
1417 *
1418 * variant = 5*8alphanum ; registered variants
1419 * / (DIGIT 3alphanum)
1420 *
1421 * extension = singleton 1*("-" (2*8alphanum))
1422 *
1423 * ; Single alphanumerics
1424 * ; "x" reserved for private use
1425 * singleton = DIGIT ; 0 - 9
1426 * / %x41-57 ; A - W
1427 * / %x59-5A ; Y - Z
1428 * / %x61-77 ; a - w
1429 * / %x79-7A ; y - z
1430 *
1431 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1432 * The parser below doesn't try to cope with extension or privateuse
1433 * that could be added but that's not interoperable anyway
1434 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001435 * Returns 1 if correct 0 otherwise
1436 **/
1437int
1438xmlCheckLanguageID(const xmlChar * lang)
1439{
Daniel Veillard60587d62010-11-04 15:16:27 +01001440 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001441
1442 if (cur == NULL)
1443 return (0);
1444 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001445 ((cur[0] == 'I') && (cur[1] == '-')) ||
1446 ((cur[0] == 'x') && (cur[1] == '-')) ||
1447 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001448 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001449 * Still allow IANA code and user code which were coming
1450 * from the previous version of the XML-1.0 specification
1451 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001452 */
1453 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001454 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001455 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1456 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001457 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001458 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001459 nxt = cur;
1460 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1461 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1462 nxt++;
1463 if (nxt - cur >= 4) {
1464 /*
1465 * Reserved
1466 */
1467 if ((nxt - cur > 8) || (nxt[0] != 0))
1468 return(0);
1469 return(1);
1470 }
1471 if (nxt - cur < 2)
1472 return(0);
1473 /* we got an ISO 639 code */
1474 if (nxt[0] == 0)
1475 return(1);
1476 if (nxt[0] != '-')
1477 return(0);
1478
1479 nxt++;
1480 cur = nxt;
1481 /* now we can have extlang or script or region or variant */
1482 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1483 goto region_m49;
1484
1485 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1486 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487 nxt++;
1488 if (nxt - cur == 4)
1489 goto script;
1490 if (nxt - cur == 2)
1491 goto region;
1492 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1493 goto variant;
1494 if (nxt - cur != 3)
1495 return(0);
1496 /* we parsed an extlang */
1497 if (nxt[0] == 0)
1498 return(1);
1499 if (nxt[0] != '-')
1500 return(0);
1501
1502 nxt++;
1503 cur = nxt;
1504 /* now we can have script or region or variant */
1505 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1506 goto region_m49;
1507
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511 if (nxt - cur == 2)
1512 goto region;
1513 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1514 goto variant;
1515 if (nxt - cur != 4)
1516 return(0);
1517 /* we parsed a script */
1518script:
1519 if (nxt[0] == 0)
1520 return(1);
1521 if (nxt[0] != '-')
1522 return(0);
1523
1524 nxt++;
1525 cur = nxt;
1526 /* now we can have region or variant */
1527 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528 goto region_m49;
1529
1530 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532 nxt++;
1533
1534 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535 goto variant;
1536 if (nxt - cur != 2)
1537 return(0);
1538 /* we parsed a region */
1539region:
1540 if (nxt[0] == 0)
1541 return(1);
1542 if (nxt[0] != '-')
1543 return(0);
1544
1545 nxt++;
1546 cur = nxt;
1547 /* now we can just have a variant */
1548 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1549 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1550 nxt++;
1551
1552 if ((nxt - cur < 5) || (nxt - cur > 8))
1553 return(0);
1554
1555 /* we parsed a variant */
1556variant:
1557 if (nxt[0] == 0)
1558 return(1);
1559 if (nxt[0] != '-')
1560 return(0);
1561 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001562 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001563
1564region_m49:
1565 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1566 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1567 nxt += 3;
1568 goto region;
1569 }
1570 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001571}
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573/************************************************************************
1574 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001575 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001576 * *
1577 ************************************************************************/
1578
Daniel Veillard8ed10722009-08-20 19:17:36 +02001579static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1580 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001581
Daniel Veillard0fb18932003-09-07 09:14:37 +00001582#ifdef SAX2
1583/**
1584 * nsPush:
1585 * @ctxt: an XML parser context
1586 * @prefix: the namespace prefix or NULL
1587 * @URL: the namespace name
1588 *
1589 * Pushes a new parser namespace on top of the ns stack
1590 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001591 * Returns -1 in case of error, -2 if the namespace should be discarded
1592 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001593 */
1594static int
1595nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1596{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001597 if (ctxt->options & XML_PARSE_NSCLEAN) {
1598 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001599 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001600 if (ctxt->nsTab[i] == prefix) {
1601 /* in scope */
1602 if (ctxt->nsTab[i + 1] == URL)
1603 return(-2);
1604 /* out of scope keep it */
1605 break;
1606 }
1607 }
1608 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001609 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1610 ctxt->nsMax = 10;
1611 ctxt->nsNr = 0;
1612 ctxt->nsTab = (const xmlChar **)
1613 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1614 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001615 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001616 ctxt->nsMax = 0;
1617 return (-1);
1618 }
1619 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001620 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001621 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001622 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1623 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1624 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001625 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001626 ctxt->nsMax /= 2;
1627 return (-1);
1628 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001629 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001630 }
1631 ctxt->nsTab[ctxt->nsNr++] = prefix;
1632 ctxt->nsTab[ctxt->nsNr++] = URL;
1633 return (ctxt->nsNr);
1634}
1635/**
1636 * nsPop:
1637 * @ctxt: an XML parser context
1638 * @nr: the number to pop
1639 *
1640 * Pops the top @nr parser prefix/namespace from the ns stack
1641 *
1642 * Returns the number of namespaces removed
1643 */
1644static int
1645nsPop(xmlParserCtxtPtr ctxt, int nr)
1646{
1647 int i;
1648
1649 if (ctxt->nsTab == NULL) return(0);
1650 if (ctxt->nsNr < nr) {
1651 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1652 nr = ctxt->nsNr;
1653 }
1654 if (ctxt->nsNr <= 0)
1655 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001656
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657 for (i = 0;i < nr;i++) {
1658 ctxt->nsNr--;
1659 ctxt->nsTab[ctxt->nsNr] = NULL;
1660 }
1661 return(nr);
1662}
1663#endif
1664
1665static int
1666xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1667 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001668 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001669 int maxatts;
1670
1671 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001672 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001673 atts = (const xmlChar **)
1674 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001675 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001676 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001677 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1678 if (attallocs == NULL) goto mem_error;
1679 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001680 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 } else if (nr + 5 > ctxt->maxatts) {
1682 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001683 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1684 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001686 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1688 (maxatts / 5) * sizeof(int));
1689 if (attallocs == NULL) goto mem_error;
1690 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001691 ctxt->maxatts = maxatts;
1692 }
1693 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001695 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001696 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001697}
1698
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001699/**
1700 * inputPush:
1701 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001702 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001703 *
1704 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001705 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001706 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001707 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001708int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001709inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1710{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001711 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001712 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001713 if (ctxt->inputNr >= ctxt->inputMax) {
1714 ctxt->inputMax *= 2;
1715 ctxt->inputTab =
1716 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1717 ctxt->inputMax *
1718 sizeof(ctxt->inputTab[0]));
1719 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001720 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001721 xmlFreeInputStream(value);
1722 ctxt->inputMax /= 2;
1723 value = NULL;
1724 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001725 }
1726 }
1727 ctxt->inputTab[ctxt->inputNr] = value;
1728 ctxt->input = value;
1729 return (ctxt->inputNr++);
1730}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001731/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001732 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001733 * @ctxt: an XML parser context
1734 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001736 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001737 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001738 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001739xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001740inputPop(xmlParserCtxtPtr ctxt)
1741{
1742 xmlParserInputPtr ret;
1743
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001744 if (ctxt == NULL)
1745 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001746 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001747 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001748 ctxt->inputNr--;
1749 if (ctxt->inputNr > 0)
1750 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1751 else
1752 ctxt->input = NULL;
1753 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001754 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001755 return (ret);
1756}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001757/**
1758 * nodePush:
1759 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001760 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001761 *
1762 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001763 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001764 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001765 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001766int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001767nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1768{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001771 xmlNodePtr *tmp;
1772
1773 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1774 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001775 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001776 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001777 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001778 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001780 ctxt->nodeTab = tmp;
1781 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001782 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001783 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1784 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001785 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001786 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001787 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001788 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001789 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001790 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001791 ctxt->nodeTab[ctxt->nodeNr] = value;
1792 ctxt->node = value;
1793 return (ctxt->nodeNr++);
1794}
Daniel Veillard8915c152008-08-26 13:05:34 +00001795
Daniel Veillard1c732d22002-11-30 11:22:59 +00001796/**
1797 * nodePop:
1798 * @ctxt: an XML parser context
1799 *
1800 * Pops the top element node from the node stack
1801 *
1802 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001803 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001804xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001805nodePop(xmlParserCtxtPtr ctxt)
1806{
1807 xmlNodePtr ret;
1808
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001809 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001810 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001811 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001812 ctxt->nodeNr--;
1813 if (ctxt->nodeNr > 0)
1814 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1815 else
1816 ctxt->node = NULL;
1817 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001818 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001819 return (ret);
1820}
Daniel Veillarda2351322004-06-27 12:08:10 +00001821
1822#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001823/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 * nameNsPush:
1825 * @ctxt: an XML parser context
1826 * @value: the element name
1827 * @prefix: the element prefix
1828 * @URI: the element namespace name
1829 *
1830 * Pushes a new element name/prefix/URL on top of the name stack
1831 *
1832 * Returns -1 in case of error, the index in the stack otherwise
1833 */
1834static int
1835nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1836 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1837{
1838 if (ctxt->nameNr >= ctxt->nameMax) {
1839 const xmlChar * *tmp;
1840 void **tmp2;
1841 ctxt->nameMax *= 2;
1842 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843 ctxt->nameMax *
1844 sizeof(ctxt->nameTab[0]));
1845 if (tmp == NULL) {
1846 ctxt->nameMax /= 2;
1847 goto mem_error;
1848 }
1849 ctxt->nameTab = tmp;
1850 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1851 ctxt->nameMax * 3 *
1852 sizeof(ctxt->pushTab[0]));
1853 if (tmp2 == NULL) {
1854 ctxt->nameMax /= 2;
1855 goto mem_error;
1856 }
1857 ctxt->pushTab = tmp2;
1858 }
1859 ctxt->nameTab[ctxt->nameNr] = value;
1860 ctxt->name = value;
1861 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1862 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001863 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001864 return (ctxt->nameNr++);
1865mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001866 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001867 return (-1);
1868}
1869/**
1870 * nameNsPop:
1871 * @ctxt: an XML parser context
1872 *
1873 * Pops the top element/prefix/URI name from the name stack
1874 *
1875 * Returns the name just removed
1876 */
1877static const xmlChar *
1878nameNsPop(xmlParserCtxtPtr ctxt)
1879{
1880 const xmlChar *ret;
1881
1882 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001883 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001884 ctxt->nameNr--;
1885 if (ctxt->nameNr > 0)
1886 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1887 else
1888 ctxt->name = NULL;
1889 ret = ctxt->nameTab[ctxt->nameNr];
1890 ctxt->nameTab[ctxt->nameNr] = NULL;
1891 return (ret);
1892}
Daniel Veillarda2351322004-06-27 12:08:10 +00001893#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001894
1895/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896 * namePush:
1897 * @ctxt: an XML parser context
1898 * @value: the element name
1899 *
1900 * Pushes a new element name on top of the name stack
1901 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001902 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001903 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001904int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001905namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001906{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001907 if (ctxt == NULL) return (-1);
1908
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001910 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001911 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001912 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001913 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001914 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001916 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001917 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001918 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001919 }
1920 ctxt->nameTab[ctxt->nameNr] = value;
1921 ctxt->name = value;
1922 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001923mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001924 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001925 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001926}
1927/**
1928 * namePop:
1929 * @ctxt: an XML parser context
1930 *
1931 * Pops the top element name from the name stack
1932 *
1933 * Returns the name just removed
1934 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001935const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001936namePop(xmlParserCtxtPtr ctxt)
1937{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001938 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001939
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001940 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1941 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001942 ctxt->nameNr--;
1943 if (ctxt->nameNr > 0)
1944 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945 else
1946 ctxt->name = NULL;
1947 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001948 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001949 return (ret);
1950}
Owen Taylor3473f882001-02-23 17:55:21 +00001951
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001952static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001953 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001954 int *tmp;
1955
Owen Taylor3473f882001-02-23 17:55:21 +00001956 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001957 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1958 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1959 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001961 ctxt->spaceMax /=2;
1962 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001963 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001964 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 }
1966 ctxt->spaceTab[ctxt->spaceNr] = val;
1967 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1968 return(ctxt->spaceNr++);
1969}
1970
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001971static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001972 int ret;
1973 if (ctxt->spaceNr <= 0) return(0);
1974 ctxt->spaceNr--;
1975 if (ctxt->spaceNr > 0)
1976 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1977 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001978 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001979 ret = ctxt->spaceTab[ctxt->spaceNr];
1980 ctxt->spaceTab[ctxt->spaceNr] = -1;
1981 return(ret);
1982}
1983
1984/*
1985 * Macros for accessing the content. Those should be used only by the parser,
1986 * and not exported.
1987 *
1988 * Dirty macros, i.e. one often need to make assumption on the context to
1989 * use them
1990 *
1991 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1992 * To be used with extreme caution since operations consuming
1993 * characters may move the input buffer to a different location !
1994 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1995 * This should be used internally by the parser
1996 * only to compare to ASCII values otherwise it would break when
1997 * running with UTF-8 encoding.
1998 * RAW same as CUR but in the input buffer, bypass any token
1999 * extraction that may have been done
2000 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2001 * to compare on ASCII based substring.
2002 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002004 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002005 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002006 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2007 *
2008 * NEXT Skip to the next character, this does the proper decoding
2009 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002010 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002011 * CUR_CHAR(l) returns the current unicode character (int), set l
2012 * to the number of xmlChars used for the encoding [0-5].
2013 * CUR_SCHAR same but operate on a string instead of the context
2014 * COPY_BUF copy the current unicode char to the target buffer, increment
2015 * the index
2016 * GROW, SHRINK handling of input buffers
2017 */
2018
Daniel Veillardfdc91562002-07-01 21:52:03 +00002019#define RAW (*ctxt->input->cur)
2020#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002021#define NXT(val) ctxt->input->cur[(val)]
2022#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002023#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002024
Daniel Veillarda07050d2003-10-19 14:46:32 +00002025#define CMP4( s, c1, c2, c3, c4 ) \
2026 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2027 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2028#define CMP5( s, c1, c2, c3, c4, c5 ) \
2029 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2030#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2031 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2032#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2033 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2034#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2035 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2036#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2037 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2038 ((unsigned char *) s)[ 8 ] == c9 )
2039#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2040 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2041 ((unsigned char *) s)[ 9 ] == c10 )
2042
Owen Taylor3473f882001-02-23 17:55:21 +00002043#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002044 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002045 if (*ctxt->input->cur == 0) \
2046 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002047 } while (0)
2048
Daniel Veillard0b787f32004-03-26 17:29:53 +00002049#define SKIPL(val) do { \
2050 int skipl; \
2051 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002052 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002053 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002054 } else ctxt->input->col++; \
2055 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002056 ctxt->input->cur++; \
2057 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002058 if (*ctxt->input->cur == 0) \
2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002060 } while (0)
2061
Daniel Veillarda880b122003-04-21 21:36:41 +00002062#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002063 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2064 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002065 xmlSHRINK (ctxt);
2066
2067static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2068 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002069 if (*ctxt->input->cur == 0)
2070 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2071}
Owen Taylor3473f882001-02-23 17:55:21 +00002072
Daniel Veillarda880b122003-04-21 21:36:41 +00002073#define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002075 xmlGROW (ctxt);
2076
2077static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Vlad Tsyrklevich28f52fe2017-08-10 15:08:48 -07002083 ((ctxt->input->buf) &&
2084 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002085 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002087 xmlHaltParser(ctxt);
2088 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002089 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002090 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002091 if ((ctxt->input->cur > ctxt->input->end) ||
2092 (ctxt->input->cur < ctxt->input->base)) {
2093 xmlHaltParser(ctxt);
2094 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095 return;
2096 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002097 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2098 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002099}
Owen Taylor3473f882001-02-23 17:55:21 +00002100
2101#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103#define NEXT xmlNextChar(ctxt)
2104
Daniel Veillard21a0f912001-02-25 19:54:14 +00002105#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002106 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002109 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
Owen Taylor3473f882001-02-23 17:55:21 +00002113#define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002117 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002118 } while (0)
2119
2120#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122
2123#define COPY_BUF(l,b,i,v) \
2124 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002125 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002126
2127/**
2128 * xmlSkipBlankChars:
2129 * @ctxt: the XML parser context
2130 *
2131 * skip all blanks character found at that point in the input streams.
2132 * It pops up finished entities in the process if allowable at that point.
2133 *
2134 * Returns the number of space chars skipped
2135 */
2136
2137int
2138xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002139 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002140
2141 /*
2142 * It's Okay to use CUR/NEXT here since all the blanks are on
2143 * the ASCII range.
2144 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002145 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002147 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002148 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002149 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002150 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002151 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002152 if (*cur == '\n') {
2153 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002154 } else {
2155 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002156 }
2157 cur++;
2158 res++;
2159 if (*cur == 0) {
2160 ctxt->input->cur = cur;
2161 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2162 cur = ctxt->input->cur;
2163 }
2164 }
2165 ctxt->input->cur = cur;
2166 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002167 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168
2169 while (1) {
2170 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002171 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002172 } else if (CUR == '%') {
2173 /*
2174 * Need to handle support of entities branching here
2175 */
2176 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177 break;
2178 xmlParsePEReference(ctxt);
2179 } else if (CUR == 0) {
2180 if (ctxt->inputNr <= 1)
2181 break;
2182 xmlPopInput(ctxt);
2183 } else {
2184 break;
2185 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002186
2187 /*
2188 * Also increase the counter when entering or exiting a PERef.
2189 * The spec says: "When a parameter-entity reference is recognized
2190 * in the DTD and included, its replacement text MUST be enlarged
2191 * by the attachment of one leading and one following space (#x20)
2192 * character."
2193 */
2194 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002195 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002196 }
Owen Taylor3473f882001-02-23 17:55:21 +00002197 return(res);
2198}
2199
2200/************************************************************************
2201 * *
2202 * Commodity functions to handle entities *
2203 * *
2204 ************************************************************************/
2205
2206/**
2207 * xmlPopInput:
2208 * @ctxt: an XML parser context
2209 *
2210 * xmlPopInput: the current input pointed by ctxt->input came to an end
2211 * pop it and return the next char.
2212 *
2213 * Returns the current xmlChar in the parser context
2214 */
2215xmlChar
2216xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002217 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002218 if (xmlParserDebugEntities)
2219 xmlGenericError(xmlGenericErrorContext,
2220 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002221 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222 (ctxt->instate != XML_PARSER_EOF))
2223 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2224 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002225 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002226 if (*ctxt->input->cur == 0)
2227 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002228 return(CUR);
2229}
2230
2231/**
2232 * xmlPushInput:
2233 * @ctxt: an XML parser context
2234 * @input: an XML parser input fragment (entity, XML fragment ...).
2235 *
2236 * xmlPushInput: switch to a new input stream which is stacked on top
2237 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002238 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002239 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002240int
Owen Taylor3473f882001-02-23 17:55:21 +00002241xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002242 int ret;
2243 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002244
2245 if (xmlParserDebugEntities) {
2246 if ((ctxt->input != NULL) && (ctxt->input->filename))
2247 xmlGenericError(xmlGenericErrorContext,
2248 "%s(%d): ", ctxt->input->filename,
2249 ctxt->input->line);
2250 xmlGenericError(xmlGenericErrorContext,
2251 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002253 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254 (ctxt->inputNr > 1024)) {
2255 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2256 while (ctxt->inputNr > 1)
2257 xmlFreeInputStream(inputPop(ctxt));
2258 return(-1);
2259 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002260 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002261 if (ctxt->instate == XML_PARSER_EOF)
2262 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002263 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002264 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002265}
2266
2267/**
2268 * xmlParseCharRef:
2269 * @ctxt: an XML parser context
2270 *
2271 * parse Reference declarations
2272 *
2273 * [66] CharRef ::= '&#' [0-9]+ ';' |
2274 * '&#x' [0-9a-fA-F]+ ';'
2275 *
2276 * [ WFC: Legal Character ]
2277 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002278 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002279 *
2280 * Returns the value parsed (as an int), 0 in case of error
2281 */
2282int
2283xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002284 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002285 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002286 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002287
Owen Taylor3473f882001-02-23 17:55:21 +00002288 /*
2289 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2290 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002291 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002292 (NXT(2) == 'x')) {
2293 SKIP(3);
2294 GROW;
2295 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002296 if (count++ > 20) {
2297 count = 0;
2298 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002299 if (ctxt->instate == XML_PARSER_EOF)
2300 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002301 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002302 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002303 val = val * 16 + (CUR - '0');
2304 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2305 val = val * 16 + (CUR - 'a') + 10;
2306 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2307 val = val * 16 + (CUR - 'A') + 10;
2308 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002309 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002310 val = 0;
2311 break;
2312 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002313 if (val > 0x10FFFF)
2314 outofrange = val;
2315
Owen Taylor3473f882001-02-23 17:55:21 +00002316 NEXT;
2317 count++;
2318 }
2319 if (RAW == ';') {
2320 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002321 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002322 ctxt->nbChars ++;
2323 ctxt->input->cur++;
2324 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002325 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002326 SKIP(2);
2327 GROW;
2328 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002329 if (count++ > 20) {
2330 count = 0;
2331 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002332 if (ctxt->instate == XML_PARSER_EOF)
2333 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002334 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002335 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002336 val = val * 10 + (CUR - '0');
2337 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002338 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002339 val = 0;
2340 break;
2341 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002342 if (val > 0x10FFFF)
2343 outofrange = val;
2344
Owen Taylor3473f882001-02-23 17:55:21 +00002345 NEXT;
2346 count++;
2347 }
2348 if (RAW == ';') {
2349 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002350 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002351 ctxt->nbChars ++;
2352 ctxt->input->cur++;
2353 }
2354 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002355 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002356 }
2357
2358 /*
2359 * [ WFC: Legal Character ]
2360 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002361 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002362 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002363 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002364 return(val);
2365 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002366 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2367 "xmlParseCharRef: invalid xmlChar value %d\n",
2368 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002369 }
2370 return(0);
2371}
2372
2373/**
2374 * xmlParseStringCharRef:
2375 * @ctxt: an XML parser context
2376 * @str: a pointer to an index in the string
2377 *
2378 * parse Reference declarations, variant parsing from a string rather
2379 * than an an input flow.
2380 *
2381 * [66] CharRef ::= '&#' [0-9]+ ';' |
2382 * '&#x' [0-9a-fA-F]+ ';'
2383 *
2384 * [ WFC: Legal Character ]
2385 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002386 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002387 *
2388 * Returns the value parsed (as an int), 0 in case of error, str will be
2389 * updated to the current value of the index
2390 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002391static int
Owen Taylor3473f882001-02-23 17:55:21 +00002392xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2393 const xmlChar *ptr;
2394 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002395 unsigned int val = 0;
2396 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002397
2398 if ((str == NULL) || (*str == NULL)) return(0);
2399 ptr = *str;
2400 cur = *ptr;
2401 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2402 ptr += 3;
2403 cur = *ptr;
2404 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002405 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002406 val = val * 16 + (cur - '0');
2407 else if ((cur >= 'a') && (cur <= 'f'))
2408 val = val * 16 + (cur - 'a') + 10;
2409 else if ((cur >= 'A') && (cur <= 'F'))
2410 val = val * 16 + (cur - 'A') + 10;
2411 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002412 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002413 val = 0;
2414 break;
2415 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002416 if (val > 0x10FFFF)
2417 outofrange = val;
2418
Owen Taylor3473f882001-02-23 17:55:21 +00002419 ptr++;
2420 cur = *ptr;
2421 }
2422 if (cur == ';')
2423 ptr++;
2424 } else if ((cur == '&') && (ptr[1] == '#')){
2425 ptr += 2;
2426 cur = *ptr;
2427 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002428 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002429 val = val * 10 + (cur - '0');
2430 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002431 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 val = 0;
2433 break;
2434 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002435 if (val > 0x10FFFF)
2436 outofrange = val;
2437
Owen Taylor3473f882001-02-23 17:55:21 +00002438 ptr++;
2439 cur = *ptr;
2440 }
2441 if (cur == ';')
2442 ptr++;
2443 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002444 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002445 return(0);
2446 }
2447 *str = ptr;
2448
2449 /*
2450 * [ WFC: Legal Character ]
2451 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002452 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002453 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002454 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002455 return(val);
2456 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002457 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2458 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2459 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002460 }
2461 return(0);
2462}
2463
2464/**
2465 * xmlParserHandlePEReference:
2466 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002467 *
Owen Taylor3473f882001-02-23 17:55:21 +00002468 * [69] PEReference ::= '%' Name ';'
2469 *
2470 * [ WFC: No Recursion ]
2471 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002472 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002473 *
2474 * [ WFC: Entity Declared ]
2475 * In a document without any DTD, a document with only an internal DTD
2476 * subset which contains no parameter entity references, or a document
2477 * with "standalone='yes'", ... ... The declaration of a parameter
2478 * entity must precede any reference to it...
2479 *
2480 * [ VC: Entity Declared ]
2481 * In a document with an external subset or external parameter entities
2482 * with "standalone='no'", ... ... The declaration of a parameter entity
2483 * must precede any reference to it...
2484 *
2485 * [ WFC: In DTD ]
2486 * Parameter-entity references may only appear in the DTD.
2487 * NOTE: misleading but this is handled.
2488 *
2489 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002490 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002491 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002492 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002493 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002494 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002495 */
2496void
2497xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002498 switch(ctxt->instate) {
2499 case XML_PARSER_CDATA_SECTION:
2500 return;
2501 case XML_PARSER_COMMENT:
2502 return;
2503 case XML_PARSER_START_TAG:
2504 return;
2505 case XML_PARSER_END_TAG:
2506 return;
2507 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002508 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002509 return;
2510 case XML_PARSER_PROLOG:
2511 case XML_PARSER_START:
2512 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002513 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002514 return;
2515 case XML_PARSER_ENTITY_DECL:
2516 case XML_PARSER_CONTENT:
2517 case XML_PARSER_ATTRIBUTE_VALUE:
2518 case XML_PARSER_PI:
2519 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002520 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002521 /* we just ignore it there */
2522 return;
2523 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002524 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002525 return;
2526 case XML_PARSER_ENTITY_VALUE:
2527 /*
2528 * NOTE: in the case of entity values, we don't do the
2529 * substitution here since we need the literal
2530 * entity value to be able to save the internal
2531 * subset of the document.
2532 * This will be handled by xmlStringDecodeEntities
2533 */
2534 return;
2535 case XML_PARSER_DTD:
2536 /*
2537 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2538 * In the internal DTD subset, parameter-entity references
2539 * can occur only where markup declarations can occur, not
2540 * within markup declarations.
2541 * In that case this is handled in xmlParseMarkupDecl
2542 */
2543 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2544 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002545 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002546 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002547 break;
2548 case XML_PARSER_IGNORE:
2549 return;
2550 }
2551
Nick Wellnhofer03904152017-06-05 21:16:00 +02002552 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002553}
2554
2555/*
2556 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002557 * buffer##_size is expected to be a size_t
2558 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002559 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002560#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002561 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002562 size_t new_size = buffer##_size * 2 + n; \
2563 if (new_size < buffer##_size) goto mem_error; \
2564 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002565 if (tmp == NULL) goto mem_error; \
2566 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002567 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002568}
2569
2570/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002571 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002572 * @ctxt: the parser context
2573 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002574 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002575 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2576 * @end: an end marker xmlChar, 0 if none
2577 * @end2: an end marker xmlChar, 0 if none
2578 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002579 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002580 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002581 *
2582 * [67] Reference ::= EntityRef | CharRef
2583 *
2584 * [69] PEReference ::= '%' Name ';'
2585 *
2586 * Returns A newly allocated string with the substitution done. The caller
2587 * must deallocate it !
2588 */
2589xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002590xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2591 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002592 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002593 size_t buffer_size = 0;
2594 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002595
2596 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002597 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002598 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002599 xmlEntityPtr ent;
2600 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002601
Daniel Veillarda82b1822004-11-08 16:24:57 +00002602 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002603 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002604 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002605
Daniel Veillard0161e632008-08-28 15:36:32 +00002606 if (((ctxt->depth > 40) &&
2607 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2608 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002609 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002610 return(NULL);
2611 }
2612
2613 /*
2614 * allocate a translation buffer.
2615 */
2616 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002617 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002618 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002619
2620 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002621 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002622 * we are operating on already parsed values.
2623 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002624 if (str < last)
2625 c = CUR_SCHAR(str, l);
2626 else
2627 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002628 while ((c != 0) && (c != end) && /* non input consuming loop */
2629 (c != end2) && (c != end3)) {
2630
2631 if (c == 0) break;
2632 if ((c == '&') && (str[1] == '#')) {
2633 int val = xmlParseStringCharRef(ctxt, &str);
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002634 if (val == 0)
2635 goto int_error;
2636 COPY_BUF(0,buffer,nbchars,val);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002637 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002638 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002639 }
Owen Taylor3473f882001-02-23 17:55:21 +00002640 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2641 if (xmlParserDebugEntities)
2642 xmlGenericError(xmlGenericErrorContext,
2643 "String decoding Entity Reference: %.30s\n",
2644 str);
2645 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002646 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002647 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002648 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002649 if ((ent != NULL) &&
2650 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2651 if (ent->content != NULL) {
2652 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002653 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002654 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002655 }
Owen Taylor3473f882001-02-23 17:55:21 +00002656 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002657 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658 "predefined entity has no content\n");
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002659 goto int_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002660 }
2661 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002662 ctxt->depth++;
2663 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2664 0, 0, 0);
2665 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002666 if (rep == NULL)
2667 goto int_error;
Daniel Veillard0161e632008-08-28 15:36:32 +00002668
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002669 current = rep;
2670 while (*current != 0) { /* non input consuming loop */
2671 buffer[nbchars++] = *current++;
2672 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2673 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2674 goto int_error;
2675 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2676 }
2677 }
2678 xmlFree(rep);
2679 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002680 } else if (ent != NULL) {
2681 int i = xmlStrlen(ent->name);
2682 const xmlChar *cur = ent->name;
2683
2684 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002685 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002686 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002687 }
2688 for (;i > 0;i--)
2689 buffer[nbchars++] = *cur++;
2690 buffer[nbchars++] = ';';
2691 }
2692 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2693 if (xmlParserDebugEntities)
2694 xmlGenericError(xmlGenericErrorContext,
2695 "String decoding PE Reference: %.30s\n", str);
2696 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002697 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002698 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002699 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002700 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002701 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002702 /*
2703 * Note: external parsed entities will not be loaded,
2704 * it is not required for a non-validating parser to
2705 * complete external PEreferences coming from the
2706 * internal subset
2707 */
2708 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2709 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2710 (ctxt->validate != 0)) {
2711 xmlLoadEntityContent(ctxt, ent);
2712 } else {
2713 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2714 "not validating will not read content for PE entity %s\n",
2715 ent->name, NULL);
2716 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002717 }
Owen Taylor3473f882001-02-23 17:55:21 +00002718 ctxt->depth++;
2719 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2720 0, 0, 0);
2721 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002722 if (rep == NULL)
2723 goto int_error;
2724 current = rep;
2725 while (*current != 0) { /* non input consuming loop */
2726 buffer[nbchars++] = *current++;
2727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2729 goto int_error;
2730 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731 }
2732 }
2733 xmlFree(rep);
2734 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002735 }
2736 } else {
2737 COPY_BUF(l,buffer,nbchars,c);
2738 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002739 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002741 }
2742 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002743 if (str < last)
2744 c = CUR_SCHAR(str, l);
2745 else
2746 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002747 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002748 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002749 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002750
2751mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002752 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002753int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002754 if (rep != NULL)
2755 xmlFree(rep);
2756 if (buffer != NULL)
2757 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002758 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002759}
2760
Daniel Veillarde57ec792003-09-10 10:50:59 +00002761/**
2762 * xmlStringDecodeEntities:
2763 * @ctxt: the parser context
2764 * @str: the input string
2765 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2766 * @end: an end marker xmlChar, 0 if none
2767 * @end2: an end marker xmlChar, 0 if none
2768 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002769 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002770 * Takes a entity string content and process to do the adequate substitutions.
2771 *
2772 * [67] Reference ::= EntityRef | CharRef
2773 *
2774 * [69] PEReference ::= '%' Name ';'
2775 *
2776 * Returns A newly allocated string with the substitution done. The caller
2777 * must deallocate it !
2778 */
2779xmlChar *
2780xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2781 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002782 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002783 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2784 end, end2, end3));
2785}
Owen Taylor3473f882001-02-23 17:55:21 +00002786
2787/************************************************************************
2788 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002789 * Commodity functions, cleanup needed ? *
2790 * *
2791 ************************************************************************/
2792
2793/**
2794 * areBlanks:
2795 * @ctxt: an XML parser context
2796 * @str: a xmlChar *
2797 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002798 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002799 *
2800 * Is this a sequence of blank chars that one can ignore ?
2801 *
2802 * Returns 1 if ignorable 0 otherwise.
2803 */
2804
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002805static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002807 int i, ret;
2808 xmlNodePtr lastChild;
2809
Daniel Veillard05c13a22001-09-09 08:38:09 +00002810 /*
2811 * Don't spend time trying to differentiate them, the same callback is
2812 * used !
2813 */
2814 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002815 return(0);
2816
Owen Taylor3473f882001-02-23 17:55:21 +00002817 /*
2818 * Check for xml:space value.
2819 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002820 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2821 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002822 return(0);
2823
2824 /*
2825 * Check that the string is made of blanks
2826 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002827 if (blank_chars == 0) {
2828 for (i = 0;i < len;i++)
2829 if (!(IS_BLANK_CH(str[i]))) return(0);
2830 }
Owen Taylor3473f882001-02-23 17:55:21 +00002831
2832 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002833 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002834 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002835 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002836 if (ctxt->myDoc != NULL) {
2837 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838 if (ret == 0) return(1);
2839 if (ret == 1) return(0);
2840 }
2841
2842 /*
2843 * Otherwise, heuristic :-\
2844 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002845 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002846 if ((ctxt->node->children == NULL) &&
2847 (RAW == '<') && (NXT(1) == '/')) return(0);
2848
2849 lastChild = xmlGetLastChild(ctxt->node);
2850 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002851 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2852 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002853 } else if (xmlNodeIsText(lastChild))
2854 return(0);
2855 else if ((ctxt->node->children != NULL) &&
2856 (xmlNodeIsText(ctxt->node->children)))
2857 return(0);
2858 return(1);
2859}
2860
Owen Taylor3473f882001-02-23 17:55:21 +00002861/************************************************************************
2862 * *
2863 * Extra stuff for namespace support *
2864 * Relates to http://www.w3.org/TR/WD-xml-names *
2865 * *
2866 ************************************************************************/
2867
2868/**
2869 * xmlSplitQName:
2870 * @ctxt: an XML parser context
2871 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002872 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002873 *
2874 * parse an UTF8 encoded XML qualified name string
2875 *
2876 * [NS 5] QName ::= (Prefix ':')? LocalPart
2877 *
2878 * [NS 6] Prefix ::= NCName
2879 *
2880 * [NS 7] LocalPart ::= NCName
2881 *
2882 * Returns the local part, and prefix is updated
2883 * to get the Prefix if any.
2884 */
2885
2886xmlChar *
2887xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2888 xmlChar buf[XML_MAX_NAMELEN + 5];
2889 xmlChar *buffer = NULL;
2890 int len = 0;
2891 int max = XML_MAX_NAMELEN;
2892 xmlChar *ret = NULL;
2893 const xmlChar *cur = name;
2894 int c;
2895
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002896 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002897 *prefix = NULL;
2898
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002899 if (cur == NULL) return(NULL);
2900
Owen Taylor3473f882001-02-23 17:55:21 +00002901#ifndef XML_XML_NAMESPACE
2902 /* xml: prefix is not really a namespace */
2903 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2904 (cur[2] == 'l') && (cur[3] == ':'))
2905 return(xmlStrdup(name));
2906#endif
2907
Daniel Veillard597bc482003-07-24 16:08:28 +00002908 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002909 if (cur[0] == ':')
2910 return(xmlStrdup(name));
2911
2912 c = *cur++;
2913 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2914 buf[len++] = c;
2915 c = *cur++;
2916 }
2917 if (len >= max) {
2918 /*
2919 * Okay someone managed to make a huge name, so he's ready to pay
2920 * for the processing speed.
2921 */
2922 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002923
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002924 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002926 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002927 return(NULL);
2928 }
2929 memcpy(buffer, buf, len);
2930 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2931 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002932 xmlChar *tmp;
2933
Owen Taylor3473f882001-02-23 17:55:21 +00002934 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002935 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002936 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002937 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002938 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002939 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002940 return(NULL);
2941 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002942 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002943 }
2944 buffer[len++] = c;
2945 c = *cur++;
2946 }
2947 buffer[len] = 0;
2948 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002949
Daniel Veillard597bc482003-07-24 16:08:28 +00002950 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002951 if (buffer != NULL)
2952 xmlFree(buffer);
2953 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002954 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002955 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002956
Owen Taylor3473f882001-02-23 17:55:21 +00002957 if (buffer == NULL)
2958 ret = xmlStrndup(buf, len);
2959 else {
2960 ret = buffer;
2961 buffer = NULL;
2962 max = XML_MAX_NAMELEN;
2963 }
2964
2965
2966 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002967 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002968 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002969 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002970 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002971 }
Owen Taylor3473f882001-02-23 17:55:21 +00002972 len = 0;
2973
Daniel Veillardbb284f42002-10-16 18:02:47 +00002974 /*
2975 * Check that the first character is proper to start
2976 * a new name
2977 */
2978 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2979 ((c >= 0x41) && (c <= 0x5A)) ||
2980 (c == '_') || (c == ':'))) {
2981 int l;
2982 int first = CUR_SCHAR(cur, l);
2983
2984 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002985 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002986 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002987 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002988 }
2989 }
2990 cur++;
2991
Owen Taylor3473f882001-02-23 17:55:21 +00002992 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2993 buf[len++] = c;
2994 c = *cur++;
2995 }
2996 if (len >= max) {
2997 /*
2998 * Okay someone managed to make a huge name, so he's ready to pay
2999 * for the processing speed.
3000 */
3001 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003002
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
3009 while (c != 0) { /* tested bigname2.xml */
3010 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003011 xmlChar *tmp;
3012
Owen Taylor3473f882001-02-23 17:55:21 +00003013 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003014 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003015 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003016 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003017 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003018 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003019 return(NULL);
3020 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003021 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003022 }
3023 buffer[len++] = c;
3024 c = *cur++;
3025 }
3026 buffer[len] = 0;
3027 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003028
Owen Taylor3473f882001-02-23 17:55:21 +00003029 if (buffer == NULL)
3030 ret = xmlStrndup(buf, len);
3031 else {
3032 ret = buffer;
3033 }
3034 }
3035
3036 return(ret);
3037}
3038
3039/************************************************************************
3040 * *
3041 * The parser itself *
3042 * Relates to http://www.w3.org/TR/REC-xml *
3043 * *
3044 ************************************************************************/
3045
Daniel Veillard34e3f642008-07-29 09:02:27 +00003046/************************************************************************
3047 * *
3048 * Routines to parse Name, NCName and NmToken *
3049 * *
3050 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003051#ifdef DEBUG
3052static unsigned long nbParseName = 0;
3053static unsigned long nbParseNmToken = 0;
3054static unsigned long nbParseNCName = 0;
3055static unsigned long nbParseNCNameComplex = 0;
3056static unsigned long nbParseNameComplex = 0;
3057static unsigned long nbParseStringName = 0;
3058#endif
3059
Daniel Veillard34e3f642008-07-29 09:02:27 +00003060/*
3061 * The two following functions are related to the change of accepted
3062 * characters for Name and NmToken in the Revision 5 of XML-1.0
3063 * They correspond to the modified production [4] and the new production [4a]
3064 * changes in that revision. Also note that the macros used for the
3065 * productions Letter, Digit, CombiningChar and Extender are not needed
3066 * anymore.
3067 * We still keep compatibility to pre-revision5 parsing semantic if the
3068 * new XML_PARSE_OLD10 option is given to the parser.
3069 */
3070static int
3071xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3072 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3073 /*
3074 * Use the new checks of production [4] [4a] amd [5] of the
3075 * Update 5 of XML-1.0
3076 */
3077 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3078 (((c >= 'a') && (c <= 'z')) ||
3079 ((c >= 'A') && (c <= 'Z')) ||
3080 (c == '_') || (c == ':') ||
3081 ((c >= 0xC0) && (c <= 0xD6)) ||
3082 ((c >= 0xD8) && (c <= 0xF6)) ||
3083 ((c >= 0xF8) && (c <= 0x2FF)) ||
3084 ((c >= 0x370) && (c <= 0x37D)) ||
3085 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3086 ((c >= 0x200C) && (c <= 0x200D)) ||
3087 ((c >= 0x2070) && (c <= 0x218F)) ||
3088 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3089 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3090 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3091 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3092 ((c >= 0x10000) && (c <= 0xEFFFF))))
3093 return(1);
3094 } else {
3095 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3096 return(1);
3097 }
3098 return(0);
3099}
3100
3101static int
3102xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3103 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3104 /*
3105 * Use the new checks of production [4] [4a] amd [5] of the
3106 * Update 5 of XML-1.0
3107 */
3108 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3109 (((c >= 'a') && (c <= 'z')) ||
3110 ((c >= 'A') && (c <= 'Z')) ||
3111 ((c >= '0') && (c <= '9')) || /* !start */
3112 (c == '_') || (c == ':') ||
3113 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3114 ((c >= 0xC0) && (c <= 0xD6)) ||
3115 ((c >= 0xD8) && (c <= 0xF6)) ||
3116 ((c >= 0xF8) && (c <= 0x2FF)) ||
3117 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3118 ((c >= 0x370) && (c <= 0x37D)) ||
3119 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3120 ((c >= 0x200C) && (c <= 0x200D)) ||
3121 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3122 ((c >= 0x2070) && (c <= 0x218F)) ||
3123 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3124 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3125 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3126 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3127 ((c >= 0x10000) && (c <= 0xEFFFF))))
3128 return(1);
3129 } else {
3130 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003132 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003133 (IS_COMBINING(c)) ||
3134 (IS_EXTENDER(c)))
3135 return(1);
3136 }
3137 return(0);
3138}
3139
Daniel Veillarde57ec792003-09-10 10:50:59 +00003140static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003141 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003142
Daniel Veillard34e3f642008-07-29 09:02:27 +00003143static const xmlChar *
3144xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3145 int len = 0, l;
3146 int c;
3147 int count = 0;
3148
Daniel Veillardc6561462009-03-25 10:22:31 +00003149#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003150 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003151#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003152
3153 /*
3154 * Handler for more complex cases
3155 */
3156 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003157 if (ctxt->instate == XML_PARSER_EOF)
3158 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003159 c = CUR_CHAR(l);
3160 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161 /*
3162 * Use the new checks of production [4] [4a] amd [5] of the
3163 * Update 5 of XML-1.0
3164 */
3165 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3166 (!(((c >= 'a') && (c <= 'z')) ||
3167 ((c >= 'A') && (c <= 'Z')) ||
3168 (c == '_') || (c == ':') ||
3169 ((c >= 0xC0) && (c <= 0xD6)) ||
3170 ((c >= 0xD8) && (c <= 0xF6)) ||
3171 ((c >= 0xF8) && (c <= 0x2FF)) ||
3172 ((c >= 0x370) && (c <= 0x37D)) ||
3173 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3174 ((c >= 0x200C) && (c <= 0x200D)) ||
3175 ((c >= 0x2070) && (c <= 0x218F)) ||
3176 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3177 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3178 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3179 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3180 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3181 return(NULL);
3182 }
3183 len += l;
3184 NEXTL(l);
3185 c = CUR_CHAR(l);
3186 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3187 (((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 ((c >= '0') && (c <= '9')) || /* !start */
3190 (c == '_') || (c == ':') ||
3191 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3192 ((c >= 0xC0) && (c <= 0xD6)) ||
3193 ((c >= 0xD8) && (c <= 0xF6)) ||
3194 ((c >= 0xF8) && (c <= 0x2FF)) ||
3195 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3196 ((c >= 0x370) && (c <= 0x37D)) ||
3197 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3198 ((c >= 0x200C) && (c <= 0x200D)) ||
3199 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3200 ((c >= 0x2070) && (c <= 0x218F)) ||
3201 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205 ((c >= 0x10000) && (c <= 0xEFFFF))
3206 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003207 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003208 count = 0;
3209 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003210 if (ctxt->instate == XML_PARSER_EOF)
3211 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003212 }
3213 len += l;
3214 NEXTL(l);
3215 c = CUR_CHAR(l);
3216 }
3217 } else {
3218 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3219 (!IS_LETTER(c) && (c != '_') &&
3220 (c != ':'))) {
3221 return(NULL);
3222 }
3223 len += l;
3224 NEXTL(l);
3225 c = CUR_CHAR(l);
3226
3227 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3228 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3229 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003230 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003231 (IS_COMBINING(c)) ||
3232 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003233 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003234 count = 0;
3235 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003236 if (ctxt->instate == XML_PARSER_EOF)
3237 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003238 }
3239 len += l;
3240 NEXTL(l);
3241 c = CUR_CHAR(l);
3242 }
3243 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003244 if ((len > XML_MAX_NAME_LENGTH) &&
3245 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3246 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3247 return(NULL);
3248 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003249 if (ctxt->input->cur - ctxt->input->base < len) {
3250 /*
3251 * There were a couple of bugs where PERefs lead to to a change
3252 * of the buffer. Check the buffer size to avoid passing an invalid
3253 * pointer to xmlDictLookup.
3254 */
3255 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3256 "unexpected change of input buffer");
3257 return (NULL);
3258 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003259 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3260 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3261 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3262}
3263
Owen Taylor3473f882001-02-23 17:55:21 +00003264/**
3265 * xmlParseName:
3266 * @ctxt: an XML parser context
3267 *
3268 * parse an XML name.
3269 *
3270 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3271 * CombiningChar | Extender
3272 *
3273 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3274 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003275 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003276 *
3277 * Returns the Name parsed or NULL
3278 */
3279
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003280const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003281xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003282 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003283 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003284 int count = 0;
3285
3286 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003287
Daniel Veillardc6561462009-03-25 10:22:31 +00003288#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003289 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003290#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291
Daniel Veillard48b2f892001-02-25 16:11:03 +00003292 /*
3293 * Accelerator for simple ASCII names
3294 */
3295 in = ctxt->input->cur;
3296 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3297 ((*in >= 0x41) && (*in <= 0x5A)) ||
3298 (*in == '_') || (*in == ':')) {
3299 in++;
3300 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3301 ((*in >= 0x41) && (*in <= 0x5A)) ||
3302 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003303 (*in == '_') || (*in == '-') ||
3304 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003305 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003306 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003307 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003308 if ((count > XML_MAX_NAME_LENGTH) &&
3309 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3310 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3311 return(NULL);
3312 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003313 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003314 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003315 ctxt->nbChars += count;
3316 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003317 if (ret == NULL)
3318 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003319 return(ret);
3320 }
3321 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003322 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003323 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003324}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003325
Daniel Veillard34e3f642008-07-29 09:02:27 +00003326static const xmlChar *
3327xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3328 int len = 0, l;
3329 int c;
3330 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003331 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332
Daniel Veillardc6561462009-03-25 10:22:31 +00003333#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003334 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003335#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003336
3337 /*
3338 * Handler for more complex cases
3339 */
3340 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003341 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003342 c = CUR_CHAR(l);
3343 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3344 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3345 return(NULL);
3346 }
3347
3348 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3349 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003350 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003351 if ((len > XML_MAX_NAME_LENGTH) &&
3352 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354 return(NULL);
3355 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003356 count = 0;
3357 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003358 if (ctxt->instate == XML_PARSER_EOF)
3359 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003360 }
3361 len += l;
3362 NEXTL(l);
3363 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003364 if (c == 0) {
3365 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003366 /*
3367 * when shrinking to extend the buffer we really need to preserve
3368 * the part of the name we already parsed. Hence rolling back
3369 * by current lenght.
3370 */
3371 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003372 GROW;
3373 if (ctxt->instate == XML_PARSER_EOF)
3374 return(NULL);
Nick Wellnhofer132af1a2018-01-08 18:48:01 +01003375 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003376 c = CUR_CHAR(l);
3377 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003378 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003379 if ((len > XML_MAX_NAME_LENGTH) &&
3380 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3381 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3382 return(NULL);
3383 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003384 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003385}
3386
3387/**
3388 * xmlParseNCName:
3389 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003390 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003391 *
3392 * parse an XML name.
3393 *
3394 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3395 * CombiningChar | Extender
3396 *
3397 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3398 *
3399 * Returns the Name parsed or NULL
3400 */
3401
3402static const xmlChar *
3403xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003404 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 const xmlChar *ret;
3406 int count = 0;
3407
Daniel Veillardc6561462009-03-25 10:22:31 +00003408#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003409 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003410#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003411
3412 /*
3413 * Accelerator for simple ASCII names
3414 */
3415 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003416 e = ctxt->input->end;
3417 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3418 ((*in >= 0x41) && (*in <= 0x5A)) ||
3419 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003421 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3422 ((*in >= 0x41) && (*in <= 0x5A)) ||
3423 ((*in >= 0x30) && (*in <= 0x39)) ||
3424 (*in == '_') || (*in == '-') ||
3425 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003426 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003427 if (in >= e)
3428 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003429 if ((*in > 0) && (*in < 0x80)) {
3430 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003431 if ((count > XML_MAX_NAME_LENGTH) &&
3432 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3433 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3434 return(NULL);
3435 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003436 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3437 ctxt->input->cur = in;
3438 ctxt->nbChars += count;
3439 ctxt->input->col += count;
3440 if (ret == NULL) {
3441 xmlErrMemory(ctxt, NULL);
3442 }
3443 return(ret);
3444 }
3445 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003446complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003447 return(xmlParseNCNameComplex(ctxt));
3448}
3449
Daniel Veillard46de64e2002-05-29 08:21:33 +00003450/**
3451 * xmlParseNameAndCompare:
3452 * @ctxt: an XML parser context
3453 *
3454 * parse an XML name and compares for match
3455 * (specialized for endtag parsing)
3456 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003457 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3458 * and the name for mismatch
3459 */
3460
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003461static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003462xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003463 register const xmlChar *cmp = other;
3464 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003465 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003466
3467 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003468 if (ctxt->instate == XML_PARSER_EOF)
3469 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003470
Daniel Veillard46de64e2002-05-29 08:21:33 +00003471 in = ctxt->input->cur;
3472 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003473 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003474 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003475 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003476 }
William M. Brack76e95df2003-10-18 16:20:14 +00003477 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003479 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003480 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003481 }
3482 /* failure (or end of input buffer), check with full function */
3483 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003484 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003485 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003486 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003487 }
3488 return ret;
3489}
3490
Owen Taylor3473f882001-02-23 17:55:21 +00003491/**
3492 * xmlParseStringName:
3493 * @ctxt: an XML parser context
3494 * @str: a pointer to the string pointer (IN/OUT)
3495 *
3496 * parse an XML name.
3497 *
3498 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3499 * CombiningChar | Extender
3500 *
3501 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3502 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003503 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003504 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003505 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003506 * is updated to the current location in the string.
3507 */
3508
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003509static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003510xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3511 xmlChar buf[XML_MAX_NAMELEN + 5];
3512 const xmlChar *cur = *str;
3513 int len = 0, l;
3514 int c;
3515
Daniel Veillardc6561462009-03-25 10:22:31 +00003516#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003517 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003518#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003519
Owen Taylor3473f882001-02-23 17:55:21 +00003520 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003521 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return(NULL);
3523 }
3524
Daniel Veillard34e3f642008-07-29 09:02:27 +00003525 COPY_BUF(l,buf,len,c);
3526 cur += l;
3527 c = CUR_SCHAR(cur, l);
3528 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003529 COPY_BUF(l,buf,len,c);
3530 cur += l;
3531 c = CUR_SCHAR(cur, l);
3532 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3533 /*
3534 * Okay someone managed to make a huge name, so he's ready to pay
3535 * for the processing speed.
3536 */
3537 xmlChar *buffer;
3538 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003539
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003540 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003541 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003542 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003543 return(NULL);
3544 }
3545 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003546 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003547 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003548 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003549
3550 if ((len > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 xmlFree(buffer);
3554 return(NULL);
3555 }
Owen Taylor3473f882001-02-23 17:55:21 +00003556 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003557 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003558 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003559 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003560 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003561 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003562 return(NULL);
3563 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003564 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003565 }
3566 COPY_BUF(l,buffer,len,c);
3567 cur += l;
3568 c = CUR_SCHAR(cur, l);
3569 }
3570 buffer[len] = 0;
3571 *str = cur;
3572 return(buffer);
3573 }
3574 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003575 if ((len > XML_MAX_NAME_LENGTH) &&
3576 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578 return(NULL);
3579 }
Owen Taylor3473f882001-02-23 17:55:21 +00003580 *str = cur;
3581 return(xmlStrndup(buf, len));
3582}
3583
3584/**
3585 * xmlParseNmtoken:
3586 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003587 *
Owen Taylor3473f882001-02-23 17:55:21 +00003588 * parse an XML Nmtoken.
3589 *
3590 * [7] Nmtoken ::= (NameChar)+
3591 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003592 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003593 *
3594 * Returns the Nmtoken parsed or NULL
3595 */
3596
3597xmlChar *
3598xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3599 xmlChar buf[XML_MAX_NAMELEN + 5];
3600 int len = 0, l;
3601 int c;
3602 int count = 0;
3603
Daniel Veillardc6561462009-03-25 10:22:31 +00003604#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003605 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003606#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003607
Owen Taylor3473f882001-02-23 17:55:21 +00003608 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003609 if (ctxt->instate == XML_PARSER_EOF)
3610 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003611 c = CUR_CHAR(l);
3612
Daniel Veillard34e3f642008-07-29 09:02:27 +00003613 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003614 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003615 count = 0;
3616 GROW;
3617 }
3618 COPY_BUF(l,buf,len,c);
3619 NEXTL(l);
3620 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003621 if (c == 0) {
3622 count = 0;
3623 GROW;
3624 if (ctxt->instate == XML_PARSER_EOF)
3625 return(NULL);
3626 c = CUR_CHAR(l);
3627 }
Owen Taylor3473f882001-02-23 17:55:21 +00003628 if (len >= XML_MAX_NAMELEN) {
3629 /*
3630 * Okay someone managed to make a huge token, so he's ready to pay
3631 * for the processing speed.
3632 */
3633 xmlChar *buffer;
3634 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003635
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003636 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003637 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003638 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003639 return(NULL);
3640 }
3641 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003642 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003643 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003644 count = 0;
3645 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003646 if (ctxt->instate == XML_PARSER_EOF) {
3647 xmlFree(buffer);
3648 return(NULL);
3649 }
Owen Taylor3473f882001-02-23 17:55:21 +00003650 }
3651 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003652 xmlChar *tmp;
3653
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003654 if ((max > XML_MAX_NAME_LENGTH) &&
3655 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3656 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3657 xmlFree(buffer);
3658 return(NULL);
3659 }
Owen Taylor3473f882001-02-23 17:55:21 +00003660 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003661 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003662 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003663 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003664 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003665 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003666 return(NULL);
3667 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003668 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003669 }
3670 COPY_BUF(l,buffer,len,c);
3671 NEXTL(l);
3672 c = CUR_CHAR(l);
3673 }
3674 buffer[len] = 0;
3675 return(buffer);
3676 }
3677 }
3678 if (len == 0)
3679 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003680 if ((len > XML_MAX_NAME_LENGTH) &&
3681 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3682 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3683 return(NULL);
3684 }
Owen Taylor3473f882001-02-23 17:55:21 +00003685 return(xmlStrndup(buf, len));
3686}
3687
3688/**
3689 * xmlParseEntityValue:
3690 * @ctxt: an XML parser context
3691 * @orig: if non-NULL store a copy of the original entity value
3692 *
3693 * parse a value for ENTITY declarations
3694 *
3695 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3696 * "'" ([^%&'] | PEReference | Reference)* "'"
3697 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003698 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003699 */
3700
3701xmlChar *
3702xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3703 xmlChar *buf = NULL;
3704 int len = 0;
3705 int size = XML_PARSER_BUFFER_SIZE;
3706 int c, l;
3707 xmlChar stop;
3708 xmlChar *ret = NULL;
3709 const xmlChar *cur = NULL;
3710 xmlParserInputPtr input;
3711
3712 if (RAW == '"') stop = '"';
3713 else if (RAW == '\'') stop = '\'';
3714 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003715 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 return(NULL);
3717 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003719 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003720 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003721 return(NULL);
3722 }
3723
3724 /*
3725 * The content of the entity definition is copied in a buffer.
3726 */
3727
3728 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3729 input = ctxt->input;
3730 GROW;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003731 if (ctxt->instate == XML_PARSER_EOF)
3732 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003733 NEXT;
3734 c = CUR_CHAR(l);
3735 /*
3736 * NOTE: 4.4.5 Included in Literal
3737 * When a parameter entity reference appears in a literal entity
3738 * value, ... a single or double quote character in the replacement
3739 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003740 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003741 * In practice it means we stop the loop only when back at parsing
3742 * the initial entity and the quote is found
3743 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003744 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3745 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003746 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003747 xmlChar *tmp;
3748
Owen Taylor3473f882001-02-23 17:55:21 +00003749 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003750 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3751 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003752 xmlErrMemory(ctxt, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003753 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003754 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003755 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003756 }
3757 COPY_BUF(l,buf,len,c);
3758 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003759
3760 GROW;
3761 c = CUR_CHAR(l);
3762 if (c == 0) {
3763 GROW;
3764 c = CUR_CHAR(l);
3765 }
3766 }
3767 buf[len] = 0;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003768 if (ctxt->instate == XML_PARSER_EOF)
3769 goto error;
3770 if (c != stop) {
3771 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3772 goto error;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003773 }
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003774 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00003775
3776 /*
3777 * Raise problem w.r.t. '&' and '%' being used in non-entities
3778 * reference constructs. Note Charref will be handled in
3779 * xmlStringDecodeEntities()
3780 */
3781 cur = buf;
3782 while (*cur != 0) { /* non input consuming */
3783 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3784 xmlChar *name;
3785 xmlChar tmp = *cur;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003786 int nameOk = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003787
3788 cur++;
3789 name = xmlParseStringName(ctxt, &cur);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003790 if (name != NULL) {
3791 nameOk = 1;
3792 xmlFree(name);
3793 }
3794 if ((nameOk == 0) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003795 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003796 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003797 tmp);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003798 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003799 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003800 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3801 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003802 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003803 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003804 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003805 if (*cur == 0)
3806 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003807 }
3808 cur++;
3809 }
3810
3811 /*
3812 * Then PEReference entities are substituted.
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003813 *
3814 * NOTE: 4.4.7 Bypassed
3815 * When a general entity reference appears in the EntityValue in
3816 * an entity declaration, it is bypassed and left as is.
3817 * so XML_SUBSTITUTE_REF is not set here.
Owen Taylor3473f882001-02-23 17:55:21 +00003818 */
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003819 ++ctxt->depth;
3820 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3821 0, 0, 0);
3822 --ctxt->depth;
3823 if (orig != NULL) {
3824 *orig = buf;
3825 buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003826 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003827
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003828error:
3829 if (buf != NULL)
3830 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003831 return(ret);
3832}
3833
3834/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003835 * xmlParseAttValueComplex:
3836 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003837 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003838 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003839 *
3840 * parse a value for an attribute, this is the fallback function
3841 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003842 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003843 *
3844 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3845 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003846static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003847xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003848 xmlChar limit = 0;
3849 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003850 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003851 size_t len = 0;
3852 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003853 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003854 xmlChar *current = NULL;
3855 xmlEntityPtr ent;
3856
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (NXT(0) == '"') {
3858 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3859 limit = '"';
3860 NEXT;
3861 } else if (NXT(0) == '\'') {
3862 limit = '\'';
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864 NEXT;
3865 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003866 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003867 return(NULL);
3868 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003869
Owen Taylor3473f882001-02-23 17:55:21 +00003870 /*
3871 * allocate a translation buffer.
3872 */
3873 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003874 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003875 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003876
3877 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003878 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003879 */
3880 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003881 while (((NXT(0) != limit) && /* checked */
3882 (IS_CHAR(c)) && (c != '<')) &&
3883 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003884 /*
3885 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3886 * special option is given
3887 */
3888 if ((len > XML_MAX_TEXT_LENGTH) &&
3889 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3890 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003891 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003892 goto mem_error;
3893 }
Owen Taylor3473f882001-02-23 17:55:21 +00003894 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003895 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003896 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003897 if (NXT(1) == '#') {
3898 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003899
Owen Taylor3473f882001-02-23 17:55:21 +00003900 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003901 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003902 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003903 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003904 }
3905 buf[len++] = '&';
3906 } else {
3907 /*
3908 * The reparsing will be done in xmlStringGetNodeList()
3909 * called by the attribute() function in SAX.c
3910 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003911 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003912 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003913 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003914 buf[len++] = '&';
3915 buf[len++] = '#';
3916 buf[len++] = '3';
3917 buf[len++] = '8';
3918 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003919 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003920 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003921 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003922 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003923 }
Owen Taylor3473f882001-02-23 17:55:21 +00003924 len += xmlCopyChar(0, &buf[len], val);
3925 }
3926 } else {
3927 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003928 ctxt->nbentities++;
3929 if (ent != NULL)
3930 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003931 if ((ent != NULL) &&
3932 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003933 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003934 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003935 }
3936 if ((ctxt->replaceEntities == 0) &&
3937 (ent->content[0] == '&')) {
3938 buf[len++] = '&';
3939 buf[len++] = '#';
3940 buf[len++] = '3';
3941 buf[len++] = '8';
3942 buf[len++] = ';';
3943 } else {
3944 buf[len++] = ent->content[0];
3945 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003946 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003947 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003948 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02003949 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003951 XML_SUBSTITUTE_REF,
3952 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003953 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003954 if (rep != NULL) {
3955 current = rep;
3956 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003957 if ((*current == 0xD) || (*current == 0xA) ||
3958 (*current == 0x9)) {
3959 buf[len++] = 0x20;
3960 current++;
3961 } else
3962 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003963 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003964 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003965 }
3966 }
3967 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003968 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003969 }
3970 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003971 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003972 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003973 }
Owen Taylor3473f882001-02-23 17:55:21 +00003974 if (ent->content != NULL)
3975 buf[len++] = ent->content[0];
3976 }
3977 } else if (ent != NULL) {
3978 int i = xmlStrlen(ent->name);
3979 const xmlChar *cur = ent->name;
3980
3981 /*
3982 * This may look absurd but is needed to detect
3983 * entities problems
3984 */
3985 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003986 (ent->content != NULL) && (ent->checked == 0)) {
3987 unsigned long oldnbent = ctxt->nbentities;
3988
Peter Simons8f30bdf2016-04-15 11:56:55 +02003989 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003990 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003991 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003992 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003993
Daniel Veillardcff25462013-03-11 15:57:55 +08003994 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003995 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08003996 if (xmlStrchr(rep, '<'))
3997 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003998 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003999 rep = NULL;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02004000 } else {
4001 ent->content[0] = 0;
4002 }
Owen Taylor3473f882001-02-23 17:55:21 +00004003 }
4004
4005 /*
4006 * Just output the reference
4007 */
4008 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004009 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004010 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004011 }
4012 for (;i > 0;i--)
4013 buf[len++] = *cur++;
4014 buf[len++] = ';';
4015 }
4016 }
4017 } else {
4018 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004019 if ((len != 0) || (!normalize)) {
4020 if ((!normalize) || (!in_space)) {
4021 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004022 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004023 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004024 }
4025 }
4026 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004027 }
4028 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004029 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004030 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004031 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004032 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004033 }
4034 }
4035 NEXTL(l);
4036 }
4037 GROW;
4038 c = CUR_CHAR(l);
4039 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004040 if (ctxt->instate == XML_PARSER_EOF)
4041 goto error;
4042
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004043 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004044 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004045 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004046 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004047 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004048 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004049 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004050 if ((c != 0) && (!IS_CHAR(c))) {
4051 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4052 "invalid character in attribute value\n");
4053 } else {
4054 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4055 "AttValue: ' expected\n");
4056 }
Owen Taylor3473f882001-02-23 17:55:21 +00004057 } else
4058 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004059
4060 /*
4061 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004062 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004063 */
4064 if (len >= INT_MAX) {
4065 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004066 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004067 goto mem_error;
4068 }
4069
4070 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004071 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004072
4073mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004074 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004075error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004076 if (buf != NULL)
4077 xmlFree(buf);
4078 if (rep != NULL)
4079 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004080 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004081}
4082
4083/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004084 * xmlParseAttValue:
4085 * @ctxt: an XML parser context
4086 *
4087 * parse a value for an attribute
4088 * Note: the parser won't do substitution of entities here, this
4089 * will be handled later in xmlStringGetNodeList
4090 *
4091 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4092 * "'" ([^<&'] | Reference)* "'"
4093 *
4094 * 3.3.3 Attribute-Value Normalization:
4095 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004096 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004097 * - a character reference is processed by appending the referenced
4098 * character to the attribute value
4099 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004100 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004101 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4102 * appending #x20 to the normalized value, except that only a single
4103 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004104 * parsed entity or the literal entity value of an internal parsed entity
4105 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004106 * If the declared value is not CDATA, then the XML processor must further
4107 * process the normalized attribute value by discarding any leading and
4108 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004109 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004110 * All attributes for which no declaration has been read should be treated
4111 * by a non-validating parser as if declared CDATA.
4112 *
4113 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4114 */
4115
4116
4117xmlChar *
4118xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004119 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004120 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004121}
4122
4123/**
Owen Taylor3473f882001-02-23 17:55:21 +00004124 * xmlParseSystemLiteral:
4125 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004126 *
Owen Taylor3473f882001-02-23 17:55:21 +00004127 * parse an XML Literal
4128 *
4129 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4130 *
4131 * Returns the SystemLiteral parsed or NULL
4132 */
4133
4134xmlChar *
4135xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4136 xmlChar *buf = NULL;
4137 int len = 0;
4138 int size = XML_PARSER_BUFFER_SIZE;
4139 int cur, l;
4140 xmlChar stop;
4141 int state = ctxt->instate;
4142 int count = 0;
4143
4144 SHRINK;
4145 if (RAW == '"') {
4146 NEXT;
4147 stop = '"';
4148 } else if (RAW == '\'') {
4149 NEXT;
4150 stop = '\'';
4151 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004152 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004153 return(NULL);
4154 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004155
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004156 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004157 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004158 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004159 return(NULL);
4160 }
4161 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4162 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004163 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004164 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004165 xmlChar *tmp;
4166
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004167 if ((size > XML_MAX_NAME_LENGTH) &&
4168 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4169 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4170 xmlFree(buf);
4171 ctxt->instate = (xmlParserInputState) state;
4172 return(NULL);
4173 }
Owen Taylor3473f882001-02-23 17:55:21 +00004174 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004175 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4176 if (tmp == NULL) {
4177 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004178 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004179 ctxt->instate = (xmlParserInputState) state;
4180 return(NULL);
4181 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004182 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004183 }
4184 count++;
4185 if (count > 50) {
4186 GROW;
4187 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004188 if (ctxt->instate == XML_PARSER_EOF) {
4189 xmlFree(buf);
4190 return(NULL);
4191 }
Owen Taylor3473f882001-02-23 17:55:21 +00004192 }
4193 COPY_BUF(l,buf,len,cur);
4194 NEXTL(l);
4195 cur = CUR_CHAR(l);
4196 if (cur == 0) {
4197 GROW;
4198 SHRINK;
4199 cur = CUR_CHAR(l);
4200 }
4201 }
4202 buf[len] = 0;
4203 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004204 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004205 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004206 } else {
4207 NEXT;
4208 }
4209 return(buf);
4210}
4211
4212/**
4213 * xmlParsePubidLiteral:
4214 * @ctxt: an XML parser context
4215 *
4216 * parse an XML public literal
4217 *
4218 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4219 *
4220 * Returns the PubidLiteral parsed or NULL.
4221 */
4222
4223xmlChar *
4224xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4225 xmlChar *buf = NULL;
4226 int len = 0;
4227 int size = XML_PARSER_BUFFER_SIZE;
4228 xmlChar cur;
4229 xmlChar stop;
4230 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004231 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004232
4233 SHRINK;
4234 if (RAW == '"') {
4235 NEXT;
4236 stop = '"';
4237 } else if (RAW == '\'') {
4238 NEXT;
4239 stop = '\'';
4240 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004241 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004242 return(NULL);
4243 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004244 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004245 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004246 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 return(NULL);
4248 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004249 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004250 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004251 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004252 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004253 xmlChar *tmp;
4254
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004255 if ((size > XML_MAX_NAME_LENGTH) &&
4256 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4257 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4258 xmlFree(buf);
4259 return(NULL);
4260 }
Owen Taylor3473f882001-02-23 17:55:21 +00004261 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004262 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4263 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004264 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004265 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 return(NULL);
4267 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004268 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004269 }
4270 buf[len++] = cur;
4271 count++;
4272 if (count > 50) {
4273 GROW;
4274 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004275 if (ctxt->instate == XML_PARSER_EOF) {
4276 xmlFree(buf);
4277 return(NULL);
4278 }
Owen Taylor3473f882001-02-23 17:55:21 +00004279 }
4280 NEXT;
4281 cur = CUR;
4282 if (cur == 0) {
4283 GROW;
4284 SHRINK;
4285 cur = CUR;
4286 }
4287 }
4288 buf[len] = 0;
4289 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 } else {
4292 NEXT;
4293 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004294 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004295 return(buf);
4296}
4297
Daniel Veillard8ed10722009-08-20 19:17:36 +02004298static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004299
4300/*
4301 * used for the test in the inner loop of the char data testing
4302 */
4303static const unsigned char test_char_data[256] = {
4304 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4305 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4306 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4307 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4308 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4309 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4310 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4311 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4312 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4313 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4314 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4315 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4316 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4317 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4318 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4319 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4320 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4321 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4322 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4323 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4324 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4336};
4337
Owen Taylor3473f882001-02-23 17:55:21 +00004338/**
4339 * xmlParseCharData:
4340 * @ctxt: an XML parser context
4341 * @cdata: int indicating whether we are within a CDATA section
4342 *
4343 * parse a CharData section.
4344 * if we are within a CDATA section ']]>' marks an end of section.
4345 *
4346 * The right angle bracket (>) may be represented using the string "&gt;",
4347 * and must, for compatibility, be escaped using "&gt;" or a character
4348 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004349 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004350 *
4351 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4352 */
4353
4354void
4355xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004356 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004357 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004358 int line = ctxt->input->line;
4359 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004360 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004361
4362 SHRINK;
4363 GROW;
4364 /*
4365 * Accelerated common case where input don't need to be
4366 * modified before passing it to the handler.
4367 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004368 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004369 in = ctxt->input->cur;
4370 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004371get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004372 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004373 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004374 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004375 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004376 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004377 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004378 goto get_more_space;
4379 }
4380 if (*in == '<') {
4381 nbchar = in - ctxt->input->cur;
4382 if (nbchar > 0) {
4383 const xmlChar *tmp = ctxt->input->cur;
4384 ctxt->input->cur = in;
4385
Daniel Veillard34099b42004-11-04 17:34:35 +00004386 if ((ctxt->sax != NULL) &&
4387 (ctxt->sax->ignorableWhitespace !=
4388 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004389 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004390 if (ctxt->sax->ignorableWhitespace != NULL)
4391 ctxt->sax->ignorableWhitespace(ctxt->userData,
4392 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004393 } else {
4394 if (ctxt->sax->characters != NULL)
4395 ctxt->sax->characters(ctxt->userData,
4396 tmp, nbchar);
4397 if (*ctxt->space == -1)
4398 *ctxt->space = -2;
4399 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004400 } else if ((ctxt->sax != NULL) &&
4401 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004402 ctxt->sax->characters(ctxt->userData,
4403 tmp, nbchar);
4404 }
4405 }
4406 return;
4407 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004408
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004409get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004410 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004411 while (test_char_data[*in]) {
4412 in++;
4413 ccol++;
4414 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004415 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004416 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004417 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004418 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004419 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004420 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004421 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004422 }
4423 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004424 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004425 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004426 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004427 return;
4428 }
4429 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004430 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004431 goto get_more;
4432 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004433 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004434 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004435 if ((ctxt->sax != NULL) &&
4436 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004437 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004438 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004439 const xmlChar *tmp = ctxt->input->cur;
4440 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004441
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004442 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004443 if (ctxt->sax->ignorableWhitespace != NULL)
4444 ctxt->sax->ignorableWhitespace(ctxt->userData,
4445 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004446 } else {
4447 if (ctxt->sax->characters != NULL)
4448 ctxt->sax->characters(ctxt->userData,
4449 tmp, nbchar);
4450 if (*ctxt->space == -1)
4451 *ctxt->space = -2;
4452 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004453 line = ctxt->input->line;
4454 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004455 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004456 if (ctxt->sax->characters != NULL)
4457 ctxt->sax->characters(ctxt->userData,
4458 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004459 line = ctxt->input->line;
4460 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004461 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004462 /* something really bad happened in the SAX callback */
4463 if (ctxt->instate != XML_PARSER_CONTENT)
4464 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004465 }
4466 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004467 if (*in == 0xD) {
4468 in++;
4469 if (*in == 0xA) {
4470 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004471 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004472 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004473 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004474 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004475 in--;
4476 }
4477 if (*in == '<') {
4478 return;
4479 }
4480 if (*in == '&') {
4481 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004482 }
4483 SHRINK;
4484 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004485 if (ctxt->instate == XML_PARSER_EOF)
4486 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004487 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004488 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004489 nbchar = 0;
4490 }
Daniel Veillard50582112001-03-26 22:52:16 +00004491 ctxt->input->line = line;
4492 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004493 xmlParseCharDataComplex(ctxt, cdata);
4494}
4495
Daniel Veillard01c13b52002-12-10 15:19:08 +00004496/**
4497 * xmlParseCharDataComplex:
4498 * @ctxt: an XML parser context
4499 * @cdata: int indicating whether we are within a CDATA section
4500 *
4501 * parse a CharData section.this is the fallback function
4502 * of xmlParseCharData() when the parsing requires handling
4503 * of non-ASCII characters.
4504 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004505static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004506xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004507 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4508 int nbchar = 0;
4509 int cur, l;
4510 int count = 0;
4511
4512 SHRINK;
4513 GROW;
4514 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004515 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004516 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004517 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004518 if ((cur == ']') && (NXT(1) == ']') &&
4519 (NXT(2) == '>')) {
4520 if (cdata) break;
4521 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004522 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004523 }
4524 }
4525 COPY_BUF(l,buf,nbchar,cur);
4526 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004527 buf[nbchar] = 0;
4528
Owen Taylor3473f882001-02-23 17:55:21 +00004529 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004530 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004531 */
4532 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004533 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004534 if (ctxt->sax->ignorableWhitespace != NULL)
4535 ctxt->sax->ignorableWhitespace(ctxt->userData,
4536 buf, nbchar);
4537 } else {
4538 if (ctxt->sax->characters != NULL)
4539 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004540 if ((ctxt->sax->characters !=
4541 ctxt->sax->ignorableWhitespace) &&
4542 (*ctxt->space == -1))
4543 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004544 }
4545 }
4546 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004547 /* something really bad happened in the SAX callback */
4548 if (ctxt->instate != XML_PARSER_CONTENT)
4549 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004550 }
4551 count++;
4552 if (count > 50) {
4553 GROW;
4554 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004555 if (ctxt->instate == XML_PARSER_EOF)
4556 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004557 }
4558 NEXTL(l);
4559 cur = CUR_CHAR(l);
4560 }
4561 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004562 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004563 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004564 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004565 */
4566 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004567 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004568 if (ctxt->sax->ignorableWhitespace != NULL)
4569 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4570 } else {
4571 if (ctxt->sax->characters != NULL)
4572 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004573 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4574 (*ctxt->space == -1))
4575 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004576 }
4577 }
4578 }
Nick Wellnhofer69936b12017-08-30 14:16:01 +02004579 if ((cur != 0) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004580 /* Generate the error and skip the offending character */
4581 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4582 "PCDATA invalid Char value %d\n",
4583 cur);
4584 NEXTL(l);
4585 }
Owen Taylor3473f882001-02-23 17:55:21 +00004586}
4587
4588/**
4589 * xmlParseExternalID:
4590 * @ctxt: an XML parser context
4591 * @publicID: a xmlChar** receiving PubidLiteral
4592 * @strict: indicate whether we should restrict parsing to only
4593 * production [75], see NOTE below
4594 *
4595 * Parse an External ID or a Public ID
4596 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004597 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004598 * 'PUBLIC' S PubidLiteral S SystemLiteral
4599 *
4600 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4601 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4602 *
4603 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4604 *
4605 * Returns the function returns SystemLiteral and in the second
4606 * case publicID receives PubidLiteral, is strict is off
4607 * it is possible to return NULL and have publicID set.
4608 */
4609
4610xmlChar *
4611xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4612 xmlChar *URI = NULL;
4613
4614 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004615
4616 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004617 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004618 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004619 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4621 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004622 }
Owen Taylor3473f882001-02-23 17:55:21 +00004623 URI = xmlParseSystemLiteral(ctxt);
4624 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004625 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004626 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004627 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004628 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004629 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004631 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004632 }
Owen Taylor3473f882001-02-23 17:55:21 +00004633 *publicID = xmlParsePubidLiteral(ctxt);
4634 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004635 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004636 }
4637 if (strict) {
4638 /*
4639 * We don't handle [83] so "S SystemLiteral" is required.
4640 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004641 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004642 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004643 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004644 }
4645 } else {
4646 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004647 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004648 * "S SystemLiteral" is not detected. We skip blanks if no
4649 * system literal was found, but this is harmless since we must
4650 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004651 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004652 if (SKIP_BLANKS == 0) return(NULL);
4653 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004654 }
Owen Taylor3473f882001-02-23 17:55:21 +00004655 URI = xmlParseSystemLiteral(ctxt);
4656 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004657 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004658 }
4659 }
4660 return(URI);
4661}
4662
4663/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004664 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004665 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004666 * @buf: the already parsed part of the buffer
4667 * @len: number of bytes filles in the buffer
4668 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004669 *
4670 * Skip an XML (SGML) comment <!-- .... -->
4671 * The spec says that "For compatibility, the string "--" (double-hyphen)
4672 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004673 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004674 *
4675 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4676 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004677static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004678xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4679 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004680 int q, ql;
4681 int r, rl;
4682 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004683 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004684 int inputid;
4685
4686 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004687
Owen Taylor3473f882001-02-23 17:55:21 +00004688 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004689 len = 0;
4690 size = XML_PARSER_BUFFER_SIZE;
4691 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4692 if (buf == NULL) {
4693 xmlErrMemory(ctxt, NULL);
4694 return;
4695 }
Owen Taylor3473f882001-02-23 17:55:21 +00004696 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004697 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004698 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004699 if (q == 0)
4700 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004701 if (!IS_CHAR(q)) {
4702 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4703 "xmlParseComment: invalid xmlChar value %d\n",
4704 q);
4705 xmlFree (buf);
4706 return;
4707 }
Owen Taylor3473f882001-02-23 17:55:21 +00004708 NEXTL(ql);
4709 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004710 if (r == 0)
4711 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004712 if (!IS_CHAR(r)) {
4713 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714 "xmlParseComment: invalid xmlChar value %d\n",
4715 q);
4716 xmlFree (buf);
4717 return;
4718 }
Owen Taylor3473f882001-02-23 17:55:21 +00004719 NEXTL(rl);
4720 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004721 if (cur == 0)
4722 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004723 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004724 ((cur != '>') ||
4725 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004726 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004727 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004728 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004729 if ((len > XML_MAX_TEXT_LENGTH) &&
4730 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4731 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4732 "Comment too big found", NULL);
4733 xmlFree (buf);
4734 return;
4735 }
Owen Taylor3473f882001-02-23 17:55:21 +00004736 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004737 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004738 size_t new_size;
4739
4740 new_size = size * 2;
4741 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004742 if (new_buf == NULL) {
4743 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004744 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 return;
4746 }
William M. Bracka3215c72004-07-31 16:24:01 +00004747 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004748 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004749 }
4750 COPY_BUF(ql,buf,len,q);
4751 q = r;
4752 ql = rl;
4753 r = cur;
4754 rl = l;
4755
4756 count++;
4757 if (count > 50) {
4758 GROW;
4759 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004760 if (ctxt->instate == XML_PARSER_EOF) {
4761 xmlFree(buf);
4762 return;
4763 }
Owen Taylor3473f882001-02-23 17:55:21 +00004764 }
4765 NEXTL(l);
4766 cur = CUR_CHAR(l);
4767 if (cur == 0) {
4768 SHRINK;
4769 GROW;
4770 cur = CUR_CHAR(l);
4771 }
4772 }
4773 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004774 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004775 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004776 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004777 } else if (!IS_CHAR(cur)) {
4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779 "xmlParseComment: invalid xmlChar value %d\n",
4780 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004781 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004782 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004783 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004784 "Comment doesn't start and stop in the same"
4785 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004786 }
4787 NEXT;
4788 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4789 (!ctxt->disableSAX))
4790 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004791 }
Daniel Veillardda629342007-08-01 07:49:06 +00004792 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004793 return;
4794not_terminated:
4795 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4796 "Comment not terminated\n", NULL);
4797 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004798 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004799}
Daniel Veillardda629342007-08-01 07:49:06 +00004800
Daniel Veillard4c778d82005-01-23 17:37:44 +00004801/**
4802 * xmlParseComment:
4803 * @ctxt: an XML parser context
4804 *
4805 * Skip an XML (SGML) comment <!-- .... -->
4806 * The spec says that "For compatibility, the string "--" (double-hyphen)
4807 * must not occur within comments. "
4808 *
4809 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4810 */
4811void
4812xmlParseComment(xmlParserCtxtPtr ctxt) {
4813 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004814 size_t size = XML_PARSER_BUFFER_SIZE;
4815 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004816 xmlParserInputState state;
4817 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004818 size_t nbchar = 0;
4819 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004820 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004821
4822 /*
4823 * Check that there is a comment right here.
4824 */
4825 if ((RAW != '<') || (NXT(1) != '!') ||
4826 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004827 state = ctxt->instate;
4828 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004829 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004830 SKIP(4);
4831 SHRINK;
4832 GROW;
4833
4834 /*
4835 * Accelerated common case where input don't need to be
4836 * modified before passing it to the handler.
4837 */
4838 in = ctxt->input->cur;
4839 do {
4840 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004841 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004842 ctxt->input->line++; ctxt->input->col = 1;
4843 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004844 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004845 }
4846get_more:
4847 ccol = ctxt->input->col;
4848 while (((*in > '-') && (*in <= 0x7F)) ||
4849 ((*in >= 0x20) && (*in < '-')) ||
4850 (*in == 0x09)) {
4851 in++;
4852 ccol++;
4853 }
4854 ctxt->input->col = ccol;
4855 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004856 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004857 ctxt->input->line++; ctxt->input->col = 1;
4858 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004859 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004860 goto get_more;
4861 }
4862 nbchar = in - ctxt->input->cur;
4863 /*
4864 * save current set of data
4865 */
4866 if (nbchar > 0) {
4867 if ((ctxt->sax != NULL) &&
4868 (ctxt->sax->comment != NULL)) {
4869 if (buf == NULL) {
4870 if ((*in == '-') && (in[1] == '-'))
4871 size = nbchar + 1;
4872 else
4873 size = XML_PARSER_BUFFER_SIZE + nbchar;
4874 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4875 if (buf == NULL) {
4876 xmlErrMemory(ctxt, NULL);
4877 ctxt->instate = state;
4878 return;
4879 }
4880 len = 0;
4881 } else if (len + nbchar + 1 >= size) {
4882 xmlChar *new_buf;
4883 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4884 new_buf = (xmlChar *) xmlRealloc(buf,
4885 size * sizeof(xmlChar));
4886 if (new_buf == NULL) {
4887 xmlFree (buf);
4888 xmlErrMemory(ctxt, NULL);
4889 ctxt->instate = state;
4890 return;
4891 }
4892 buf = new_buf;
4893 }
4894 memcpy(&buf[len], ctxt->input->cur, nbchar);
4895 len += nbchar;
4896 buf[len] = 0;
4897 }
4898 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004899 if ((len > XML_MAX_TEXT_LENGTH) &&
4900 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4901 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902 "Comment too big found", NULL);
4903 xmlFree (buf);
4904 return;
4905 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004906 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004907 if (*in == 0xA) {
4908 in++;
4909 ctxt->input->line++; ctxt->input->col = 1;
4910 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004911 if (*in == 0xD) {
4912 in++;
4913 if (*in == 0xA) {
4914 ctxt->input->cur = in;
4915 in++;
4916 ctxt->input->line++; ctxt->input->col = 1;
4917 continue; /* while */
4918 }
4919 in--;
4920 }
4921 SHRINK;
4922 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004923 if (ctxt->instate == XML_PARSER_EOF) {
4924 xmlFree(buf);
4925 return;
4926 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004927 in = ctxt->input->cur;
4928 if (*in == '-') {
4929 if (in[1] == '-') {
4930 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004931 if (ctxt->input->id != inputid) {
4932 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004933 "comment doesn't start and stop in the"
4934 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00004935 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004936 SKIP(3);
4937 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4938 (!ctxt->disableSAX)) {
4939 if (buf != NULL)
4940 ctxt->sax->comment(ctxt->userData, buf);
4941 else
4942 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4943 }
4944 if (buf != NULL)
4945 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08004946 if (ctxt->instate != XML_PARSER_EOF)
4947 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004948 return;
4949 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004950 if (buf != NULL) {
4951 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4952 "Double hyphen within comment: "
4953 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004954 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004955 } else
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004958 in++;
4959 ctxt->input->col++;
4960 }
4961 in++;
4962 ctxt->input->col++;
4963 goto get_more;
4964 }
4965 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4966 xmlParseCommentComplex(ctxt, buf, len, size);
4967 ctxt->instate = state;
4968 return;
4969}
4970
Owen Taylor3473f882001-02-23 17:55:21 +00004971
4972/**
4973 * xmlParsePITarget:
4974 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004975 *
Owen Taylor3473f882001-02-23 17:55:21 +00004976 * parse the name of a PI
4977 *
4978 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4979 *
4980 * Returns the PITarget name or NULL
4981 */
4982
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004983const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004984xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004985 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004986
4987 name = xmlParseName(ctxt);
4988 if ((name != NULL) &&
4989 ((name[0] == 'x') || (name[0] == 'X')) &&
4990 ((name[1] == 'm') || (name[1] == 'M')) &&
4991 ((name[2] == 'l') || (name[2] == 'L'))) {
4992 int i;
4993 if ((name[0] == 'x') && (name[1] == 'm') &&
4994 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004995 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004996 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004997 return(name);
4998 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004999 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005000 return(name);
5001 }
5002 for (i = 0;;i++) {
5003 if (xmlW3CPIs[i] == NULL) break;
5004 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5005 return(name);
5006 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005007 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5008 "xmlParsePITarget: invalid name prefix 'xml'\n",
5009 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005010 }
Daniel Veillard37334572008-07-31 08:20:02 +00005011 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005012 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005013 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005014 }
Owen Taylor3473f882001-02-23 17:55:21 +00005015 return(name);
5016}
5017
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005018#ifdef LIBXML_CATALOG_ENABLED
5019/**
5020 * xmlParseCatalogPI:
5021 * @ctxt: an XML parser context
5022 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005023 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005024 * parse an XML Catalog Processing Instruction.
5025 *
5026 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5027 *
5028 * Occurs only if allowed by the user and if happening in the Misc
5029 * part of the document before any doctype informations
5030 * This will add the given catalog to the parsing context in order
5031 * to be used if there is a resolution need further down in the document
5032 */
5033
5034static void
5035xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5036 xmlChar *URL = NULL;
5037 const xmlChar *tmp, *base;
5038 xmlChar marker;
5039
5040 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005041 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005042 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5043 goto error;
5044 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005045 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005046 if (*tmp != '=') {
5047 return;
5048 }
5049 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005050 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005051 marker = *tmp;
5052 if ((marker != '\'') && (marker != '"'))
5053 goto error;
5054 tmp++;
5055 base = tmp;
5056 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5057 if (*tmp == 0)
5058 goto error;
5059 URL = xmlStrndup(base, tmp - base);
5060 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005061 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005062 if (*tmp != 0)
5063 goto error;
5064
5065 if (URL != NULL) {
5066 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5067 xmlFree(URL);
5068 }
5069 return;
5070
5071error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005072 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5073 "Catalog PI syntax error: %s\n",
5074 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005075 if (URL != NULL)
5076 xmlFree(URL);
5077}
5078#endif
5079
Owen Taylor3473f882001-02-23 17:55:21 +00005080/**
5081 * xmlParsePI:
5082 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005083 *
Owen Taylor3473f882001-02-23 17:55:21 +00005084 * parse an XML Processing Instruction.
5085 *
5086 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5087 *
5088 * The processing is transfered to SAX once parsed.
5089 */
5090
5091void
5092xmlParsePI(xmlParserCtxtPtr ctxt) {
5093 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005094 size_t len = 0;
5095 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005096 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005097 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005098 xmlParserInputState state;
5099 int count = 0;
5100
5101 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005102 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005103 state = ctxt->instate;
5104 ctxt->instate = XML_PARSER_PI;
5105 /*
5106 * this is a Processing Instruction.
5107 */
5108 SKIP(2);
5109 SHRINK;
5110
5111 /*
5112 * Parse the target name and check for special support like
5113 * namespace.
5114 */
5115 target = xmlParsePITarget(ctxt);
5116 if (target != NULL) {
5117 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005118 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005119 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005120 "PI declaration doesn't start and stop in"
5121 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005122 }
5123 SKIP(2);
5124
5125 /*
5126 * SAX: PI detected.
5127 */
5128 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5129 (ctxt->sax->processingInstruction != NULL))
5130 ctxt->sax->processingInstruction(ctxt->userData,
5131 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005132 if (ctxt->instate != XML_PARSER_EOF)
5133 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005134 return;
5135 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005136 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005137 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005138 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005139 ctxt->instate = state;
5140 return;
5141 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005142 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005143 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5144 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005145 }
Owen Taylor3473f882001-02-23 17:55:21 +00005146 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005147 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005148 ((cur != '?') || (NXT(1) != '>'))) {
5149 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005150 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005151 size_t new_size = size * 2;
5152 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005153 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005154 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005155 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005156 ctxt->instate = state;
5157 return;
5158 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005159 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005160 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 }
5162 count++;
5163 if (count > 50) {
5164 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005165 if (ctxt->instate == XML_PARSER_EOF) {
5166 xmlFree(buf);
5167 return;
5168 }
Owen Taylor3473f882001-02-23 17:55:21 +00005169 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005170 if ((len > XML_MAX_TEXT_LENGTH) &&
5171 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5172 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5173 "PI %s too big found", target);
5174 xmlFree(buf);
5175 ctxt->instate = state;
5176 return;
5177 }
Owen Taylor3473f882001-02-23 17:55:21 +00005178 }
5179 COPY_BUF(l,buf,len,cur);
5180 NEXTL(l);
5181 cur = CUR_CHAR(l);
5182 if (cur == 0) {
5183 SHRINK;
5184 GROW;
5185 cur = CUR_CHAR(l);
5186 }
5187 }
Daniel Veillard51304812012-07-19 20:34:26 +08005188 if ((len > XML_MAX_TEXT_LENGTH) &&
5189 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5190 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5191 "PI %s too big found", target);
5192 xmlFree(buf);
5193 ctxt->instate = state;
5194 return;
5195 }
Owen Taylor3473f882001-02-23 17:55:21 +00005196 buf[len] = 0;
5197 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005198 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5199 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005201 if (inputid != ctxt->input->id) {
5202 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5203 "PI declaration doesn't start and stop in"
5204 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005205 }
5206 SKIP(2);
5207
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005208#ifdef LIBXML_CATALOG_ENABLED
5209 if (((state == XML_PARSER_MISC) ||
5210 (state == XML_PARSER_START)) &&
5211 (xmlStrEqual(target, XML_CATALOG_PI))) {
5212 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5213 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5214 (allow == XML_CATA_ALLOW_ALL))
5215 xmlParseCatalogPI(ctxt, buf);
5216 }
5217#endif
5218
5219
Owen Taylor3473f882001-02-23 17:55:21 +00005220 /*
5221 * SAX: PI detected.
5222 */
5223 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 (ctxt->sax->processingInstruction != NULL))
5225 ctxt->sax->processingInstruction(ctxt->userData,
5226 target, buf);
5227 }
5228 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005230 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005231 }
Chris Evans77404b82011-12-14 16:18:25 +08005232 if (ctxt->instate != XML_PARSER_EOF)
5233 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005234 }
5235}
5236
5237/**
5238 * xmlParseNotationDecl:
5239 * @ctxt: an XML parser context
5240 *
5241 * parse a notation declaration
5242 *
5243 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5244 *
5245 * Hence there is actually 3 choices:
5246 * 'PUBLIC' S PubidLiteral
5247 * 'PUBLIC' S PubidLiteral S SystemLiteral
5248 * and 'SYSTEM' S SystemLiteral
5249 *
5250 * See the NOTE on xmlParseExternalID().
5251 */
5252
5253void
5254xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005255 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005256 xmlChar *Pubid;
5257 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005258
Daniel Veillarda07050d2003-10-19 14:46:32 +00005259 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005260 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 SHRINK;
5262 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005263 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005264 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5265 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005266 return;
5267 }
Owen Taylor3473f882001-02-23 17:55:21 +00005268
Daniel Veillard76d66f42001-05-16 21:05:17 +00005269 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005271 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005272 return;
5273 }
Daniel Veillard37334572008-07-31 08:20:02 +00005274 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005275 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005276 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005277 name, NULL, NULL);
5278 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005279 if (SKIP_BLANKS == 0) {
5280 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5281 "Space required after the NOTATION name'\n");
5282 return;
5283 }
Owen Taylor3473f882001-02-23 17:55:21 +00005284
5285 /*
5286 * Parse the IDs.
5287 */
5288 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5289 SKIP_BLANKS;
5290
5291 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005292 if (inputid != ctxt->input->id) {
5293 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5294 "Notation declaration doesn't start and stop"
5295 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005296 }
5297 NEXT;
5298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 (ctxt->sax->notationDecl != NULL))
5300 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5301 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005302 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005303 }
Owen Taylor3473f882001-02-23 17:55:21 +00005304 if (Systemid != NULL) xmlFree(Systemid);
5305 if (Pubid != NULL) xmlFree(Pubid);
5306 }
5307}
5308
5309/**
5310 * xmlParseEntityDecl:
5311 * @ctxt: an XML parser context
5312 *
5313 * parse <!ENTITY declarations
5314 *
5315 * [70] EntityDecl ::= GEDecl | PEDecl
5316 *
5317 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5318 *
5319 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5320 *
5321 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5322 *
5323 * [74] PEDef ::= EntityValue | ExternalID
5324 *
5325 * [76] NDataDecl ::= S 'NDATA' S Name
5326 *
5327 * [ VC: Notation Declared ]
5328 * The Name must match the declared name of a notation.
5329 */
5330
5331void
5332xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005333 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005334 xmlChar *value = NULL;
5335 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005336 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005337 int isParameter = 0;
5338 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005339
Daniel Veillard4c778d82005-01-23 17:37:44 +00005340 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005341 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005342 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005343 SHRINK;
5344 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005345 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5347 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005348 }
Owen Taylor3473f882001-02-23 17:55:21 +00005349
5350 if (RAW == '%') {
5351 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005352 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005354 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005355 }
Owen Taylor3473f882001-02-23 17:55:21 +00005356 isParameter = 1;
5357 }
5358
Daniel Veillard76d66f42001-05-16 21:05:17 +00005359 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005361 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5362 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005363 return;
5364 }
Daniel Veillard37334572008-07-31 08:20:02 +00005365 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005366 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005367 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005368 name, NULL, NULL);
5369 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005370 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5372 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005373 }
Owen Taylor3473f882001-02-23 17:55:21 +00005374
Daniel Veillardf5582f12002-06-11 10:08:16 +00005375 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005376 /*
5377 * handle the various case of definitions...
5378 */
5379 if (isParameter) {
5380 if ((RAW == '"') || (RAW == '\'')) {
5381 value = xmlParseEntityValue(ctxt, &orig);
5382 if (value) {
5383 if ((ctxt->sax != NULL) &&
5384 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5385 ctxt->sax->entityDecl(ctxt->userData, name,
5386 XML_INTERNAL_PARAMETER_ENTITY,
5387 NULL, NULL, value);
5388 }
5389 } else {
5390 URI = xmlParseExternalID(ctxt, &literal, 1);
5391 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005392 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005393 }
5394 if (URI) {
5395 xmlURIPtr uri;
5396
5397 uri = xmlParseURI((const char *) URI);
5398 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005399 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5400 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005401 /*
5402 * This really ought to be a well formedness error
5403 * but the XML Core WG decided otherwise c.f. issue
5404 * E26 of the XML erratas.
5405 */
Owen Taylor3473f882001-02-23 17:55:21 +00005406 } else {
5407 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005408 /*
5409 * Okay this is foolish to block those but not
5410 * invalid URIs.
5411 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005412 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005413 } else {
5414 if ((ctxt->sax != NULL) &&
5415 (!ctxt->disableSAX) &&
5416 (ctxt->sax->entityDecl != NULL))
5417 ctxt->sax->entityDecl(ctxt->userData, name,
5418 XML_EXTERNAL_PARAMETER_ENTITY,
5419 literal, URI, NULL);
5420 }
5421 xmlFreeURI(uri);
5422 }
5423 }
5424 }
5425 } else {
5426 if ((RAW == '"') || (RAW == '\'')) {
5427 value = xmlParseEntityValue(ctxt, &orig);
5428 if ((ctxt->sax != NULL) &&
5429 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5430 ctxt->sax->entityDecl(ctxt->userData, name,
5431 XML_INTERNAL_GENERAL_ENTITY,
5432 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005433 /*
5434 * For expat compatibility in SAX mode.
5435 */
5436 if ((ctxt->myDoc == NULL) ||
5437 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5438 if (ctxt->myDoc == NULL) {
5439 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005440 if (ctxt->myDoc == NULL) {
5441 xmlErrMemory(ctxt, "New Doc failed");
5442 return;
5443 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005444 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005445 }
5446 if (ctxt->myDoc->intSubset == NULL)
5447 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5448 BAD_CAST "fake", NULL, NULL);
5449
Daniel Veillard1af9a412003-08-20 22:54:39 +00005450 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5451 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005452 }
Owen Taylor3473f882001-02-23 17:55:21 +00005453 } else {
5454 URI = xmlParseExternalID(ctxt, &literal, 1);
5455 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005456 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005457 }
5458 if (URI) {
5459 xmlURIPtr uri;
5460
5461 uri = xmlParseURI((const char *)URI);
5462 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005463 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5464 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005465 /*
5466 * This really ought to be a well formedness error
5467 * but the XML Core WG decided otherwise c.f. issue
5468 * E26 of the XML erratas.
5469 */
Owen Taylor3473f882001-02-23 17:55:21 +00005470 } else {
5471 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005472 /*
5473 * Okay this is foolish to block those but not
5474 * invalid URIs.
5475 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005476 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005477 }
5478 xmlFreeURI(uri);
5479 }
5480 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005481 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5483 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005484 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005485 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005486 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005487 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5489 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005490 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005491 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005492 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5493 (ctxt->sax->unparsedEntityDecl != NULL))
5494 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5495 literal, URI, ndata);
5496 } else {
5497 if ((ctxt->sax != NULL) &&
5498 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499 ctxt->sax->entityDecl(ctxt->userData, name,
5500 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5501 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005502 /*
5503 * For expat compatibility in SAX mode.
5504 * assuming the entity repalcement was asked for
5505 */
5506 if ((ctxt->replaceEntities != 0) &&
5507 ((ctxt->myDoc == NULL) ||
5508 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5509 if (ctxt->myDoc == NULL) {
5510 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005511 if (ctxt->myDoc == NULL) {
5512 xmlErrMemory(ctxt, "New Doc failed");
5513 return;
5514 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005515 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005516 }
5517
5518 if (ctxt->myDoc->intSubset == NULL)
5519 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5520 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005521 xmlSAX2EntityDecl(ctxt, name,
5522 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5523 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005524 }
Owen Taylor3473f882001-02-23 17:55:21 +00005525 }
5526 }
5527 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005528 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005529 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005530 SKIP_BLANKS;
5531 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005532 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005533 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005534 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005535 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005536 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005537 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005538 "Entity declaration doesn't start and stop in"
5539 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005540 }
5541 NEXT;
5542 }
5543 if (orig != NULL) {
5544 /*
5545 * Ugly mechanism to save the raw entity value.
5546 */
5547 xmlEntityPtr cur = NULL;
5548
5549 if (isParameter) {
5550 if ((ctxt->sax != NULL) &&
5551 (ctxt->sax->getParameterEntity != NULL))
5552 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5553 } else {
5554 if ((ctxt->sax != NULL) &&
5555 (ctxt->sax->getEntity != NULL))
5556 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005557 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005558 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005559 }
Owen Taylor3473f882001-02-23 17:55:21 +00005560 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005561 if ((cur != NULL) && (cur->orig == NULL)) {
5562 cur->orig = orig;
5563 orig = NULL;
5564 }
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005566
5567done:
Owen Taylor3473f882001-02-23 17:55:21 +00005568 if (value != NULL) xmlFree(value);
5569 if (URI != NULL) xmlFree(URI);
5570 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005571 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005572 }
5573}
5574
5575/**
5576 * xmlParseDefaultDecl:
5577 * @ctxt: an XML parser context
5578 * @value: Receive a possible fixed default value for the attribute
5579 *
5580 * Parse an attribute default declaration
5581 *
5582 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5583 *
5584 * [ VC: Required Attribute ]
5585 * if the default declaration is the keyword #REQUIRED, then the
5586 * attribute must be specified for all elements of the type in the
5587 * attribute-list declaration.
5588 *
5589 * [ VC: Attribute Default Legal ]
5590 * The declared default value must meet the lexical constraints of
5591 * the declared attribute type c.f. xmlValidateAttributeDecl()
5592 *
5593 * [ VC: Fixed Attribute Default ]
5594 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005595 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005596 *
5597 * [ WFC: No < in Attribute Values ]
5598 * handled in xmlParseAttValue()
5599 *
5600 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005601 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005602 */
5603
5604int
5605xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5606 int val;
5607 xmlChar *ret;
5608
5609 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005610 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005611 SKIP(9);
5612 return(XML_ATTRIBUTE_REQUIRED);
5613 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005614 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005615 SKIP(8);
5616 return(XML_ATTRIBUTE_IMPLIED);
5617 }
5618 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005619 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005620 SKIP(6);
5621 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005622 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5624 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005625 }
Owen Taylor3473f882001-02-23 17:55:21 +00005626 }
5627 ret = xmlParseAttValue(ctxt);
5628 ctxt->instate = XML_PARSER_DTD;
5629 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005630 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005631 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005632 } else
5633 *value = ret;
5634 return(val);
5635}
5636
5637/**
5638 * xmlParseNotationType:
5639 * @ctxt: an XML parser context
5640 *
5641 * parse an Notation attribute type.
5642 *
5643 * Note: the leading 'NOTATION' S part has already being parsed...
5644 *
5645 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5646 *
5647 * [ VC: Notation Attributes ]
5648 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005649 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005650 *
5651 * Returns: the notation attribute tree built while parsing
5652 */
5653
5654xmlEnumerationPtr
5655xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005656 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005657 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005658
5659 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005660 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005661 return(NULL);
5662 }
5663 SHRINK;
5664 do {
5665 NEXT;
5666 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005667 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005669 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5670 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005671 xmlFreeEnumeration(ret);
5672 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005673 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005674 tmp = ret;
5675 while (tmp != NULL) {
5676 if (xmlStrEqual(name, tmp->name)) {
5677 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5678 "standalone: attribute notation value token %s duplicated\n",
5679 name, NULL);
5680 if (!xmlDictOwns(ctxt->dict, name))
5681 xmlFree((xmlChar *) name);
5682 break;
5683 }
5684 tmp = tmp->next;
5685 }
5686 if (tmp == NULL) {
5687 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005688 if (cur == NULL) {
5689 xmlFreeEnumeration(ret);
5690 return(NULL);
5691 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005692 if (last == NULL) ret = last = cur;
5693 else {
5694 last->next = cur;
5695 last = cur;
5696 }
Owen Taylor3473f882001-02-23 17:55:21 +00005697 }
5698 SKIP_BLANKS;
5699 } while (RAW == '|');
5700 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005701 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005702 xmlFreeEnumeration(ret);
5703 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005704 }
5705 NEXT;
5706 return(ret);
5707}
5708
5709/**
5710 * xmlParseEnumerationType:
5711 * @ctxt: an XML parser context
5712 *
5713 * parse an Enumeration attribute type.
5714 *
5715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5716 *
5717 * [ VC: Enumeration ]
5718 * Values of this type must match one of the Nmtoken tokens in
5719 * the declaration
5720 *
5721 * Returns: the enumeration attribute tree built while parsing
5722 */
5723
5724xmlEnumerationPtr
5725xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5726 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005727 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005728
5729 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005731 return(NULL);
5732 }
5733 SHRINK;
5734 do {
5735 NEXT;
5736 SKIP_BLANKS;
5737 name = xmlParseNmtoken(ctxt);
5738 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005740 return(ret);
5741 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005742 tmp = ret;
5743 while (tmp != NULL) {
5744 if (xmlStrEqual(name, tmp->name)) {
5745 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5746 "standalone: attribute enumeration value token %s duplicated\n",
5747 name, NULL);
5748 if (!xmlDictOwns(ctxt->dict, name))
5749 xmlFree(name);
5750 break;
5751 }
5752 tmp = tmp->next;
5753 }
5754 if (tmp == NULL) {
5755 cur = xmlCreateEnumeration(name);
5756 if (!xmlDictOwns(ctxt->dict, name))
5757 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005758 if (cur == NULL) {
5759 xmlFreeEnumeration(ret);
5760 return(NULL);
5761 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005762 if (last == NULL) ret = last = cur;
5763 else {
5764 last->next = cur;
5765 last = cur;
5766 }
Owen Taylor3473f882001-02-23 17:55:21 +00005767 }
5768 SKIP_BLANKS;
5769 } while (RAW == '|');
5770 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005771 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005772 return(ret);
5773 }
5774 NEXT;
5775 return(ret);
5776}
5777
5778/**
5779 * xmlParseEnumeratedType:
5780 * @ctxt: an XML parser context
5781 * @tree: the enumeration tree built while parsing
5782 *
5783 * parse an Enumerated attribute type.
5784 *
5785 * [57] EnumeratedType ::= NotationType | Enumeration
5786 *
5787 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5788 *
5789 *
5790 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5791 */
5792
5793int
5794xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005795 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005796 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005797 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005798 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5799 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005800 return(0);
5801 }
Owen Taylor3473f882001-02-23 17:55:21 +00005802 *tree = xmlParseNotationType(ctxt);
5803 if (*tree == NULL) return(0);
5804 return(XML_ATTRIBUTE_NOTATION);
5805 }
5806 *tree = xmlParseEnumerationType(ctxt);
5807 if (*tree == NULL) return(0);
5808 return(XML_ATTRIBUTE_ENUMERATION);
5809}
5810
5811/**
5812 * xmlParseAttributeType:
5813 * @ctxt: an XML parser context
5814 * @tree: the enumeration tree built while parsing
5815 *
5816 * parse the Attribute list def for an element
5817 *
5818 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5819 *
5820 * [55] StringType ::= 'CDATA'
5821 *
5822 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5823 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5824 *
5825 * Validity constraints for attribute values syntax are checked in
5826 * xmlValidateAttributeValue()
5827 *
5828 * [ VC: ID ]
5829 * Values of type ID must match the Name production. A name must not
5830 * appear more than once in an XML document as a value of this type;
5831 * i.e., ID values must uniquely identify the elements which bear them.
5832 *
5833 * [ VC: One ID per Element Type ]
5834 * No element type may have more than one ID attribute specified.
5835 *
5836 * [ VC: ID Attribute Default ]
5837 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5838 *
5839 * [ VC: IDREF ]
5840 * Values of type IDREF must match the Name production, and values
5841 * of type IDREFS must match Names; each IDREF Name must match the value
5842 * of an ID attribute on some element in the XML document; i.e. IDREF
5843 * values must match the value of some ID attribute.
5844 *
5845 * [ VC: Entity Name ]
5846 * Values of type ENTITY must match the Name production, values
5847 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005848 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005849 *
5850 * [ VC: Name Token ]
5851 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005852 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005853 *
5854 * Returns the attribute type
5855 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005856int
Owen Taylor3473f882001-02-23 17:55:21 +00005857xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5858 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005859 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005860 SKIP(5);
5861 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005862 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005863 SKIP(6);
5864 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005865 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005866 SKIP(5);
5867 return(XML_ATTRIBUTE_IDREF);
5868 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5869 SKIP(2);
5870 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005871 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005872 SKIP(6);
5873 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005874 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005875 SKIP(8);
5876 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005877 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005878 SKIP(8);
5879 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005880 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005881 SKIP(7);
5882 return(XML_ATTRIBUTE_NMTOKEN);
5883 }
5884 return(xmlParseEnumeratedType(ctxt, tree));
5885}
5886
5887/**
5888 * xmlParseAttributeListDecl:
5889 * @ctxt: an XML parser context
5890 *
5891 * : parse the Attribute list def for an element
5892 *
5893 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5894 *
5895 * [53] AttDef ::= S Name S AttType S DefaultDecl
5896 *
5897 */
5898void
5899xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005900 const xmlChar *elemName;
5901 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005902 xmlEnumerationPtr tree;
5903
Daniel Veillarda07050d2003-10-19 14:46:32 +00005904 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005905 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005906
5907 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005908 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005910 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005911 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005912 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005913 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5915 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005916 return;
5917 }
5918 SKIP_BLANKS;
5919 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005920 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005921 int type;
5922 int def;
5923 xmlChar *defaultValue = NULL;
5924
5925 GROW;
5926 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005927 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005928 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005929 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5930 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005931 break;
5932 }
5933 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005934 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005936 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005937 break;
5938 }
Owen Taylor3473f882001-02-23 17:55:21 +00005939
5940 type = xmlParseAttributeType(ctxt, &tree);
5941 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005942 break;
5943 }
5944
5945 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005946 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5948 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005949 if (tree != NULL)
5950 xmlFreeEnumeration(tree);
5951 break;
5952 }
Owen Taylor3473f882001-02-23 17:55:21 +00005953
5954 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5955 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005956 if (defaultValue != NULL)
5957 xmlFree(defaultValue);
5958 if (tree != NULL)
5959 xmlFreeEnumeration(tree);
5960 break;
5961 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005962 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5963 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005964
5965 GROW;
5966 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005967 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005968 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005969 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005970 if (defaultValue != NULL)
5971 xmlFree(defaultValue);
5972 if (tree != NULL)
5973 xmlFreeEnumeration(tree);
5974 break;
5975 }
Owen Taylor3473f882001-02-23 17:55:21 +00005976 }
Owen Taylor3473f882001-02-23 17:55:21 +00005977 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5978 (ctxt->sax->attributeDecl != NULL))
5979 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5980 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005981 else if (tree != NULL)
5982 xmlFreeEnumeration(tree);
5983
5984 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005985 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00005986 (def != XML_ATTRIBUTE_REQUIRED)) {
5987 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5988 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005989 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005990 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5991 }
Owen Taylor3473f882001-02-23 17:55:21 +00005992 if (defaultValue != NULL)
5993 xmlFree(defaultValue);
5994 GROW;
5995 }
5996 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005997 if (inputid != ctxt->input->id) {
5998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5999 "Attribute list declaration doesn't start and"
6000 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006001 }
6002 NEXT;
6003 }
Owen Taylor3473f882001-02-23 17:55:21 +00006004 }
6005}
6006
6007/**
6008 * xmlParseElementMixedContentDecl:
6009 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006010 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006011 *
6012 * parse the declaration for a Mixed Element content
6013 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006014 *
Owen Taylor3473f882001-02-23 17:55:21 +00006015 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6016 * '(' S? '#PCDATA' S? ')'
6017 *
6018 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6019 *
6020 * [ VC: No Duplicate Types ]
6021 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006022 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006023 *
6024 * returns: the list of the xmlElementContentPtr describing the element choices
6025 */
6026xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006027xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006028 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006029 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006030
6031 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006032 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006033 SKIP(7);
6034 SKIP_BLANKS;
6035 SHRINK;
6036 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006037 if (ctxt->input->id != inputchk) {
6038 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6039 "Element content declaration doesn't start and"
6040 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006041 }
Owen Taylor3473f882001-02-23 17:55:21 +00006042 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006043 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006044 if (ret == NULL)
6045 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006046 if (RAW == '*') {
6047 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6048 NEXT;
6049 }
6050 return(ret);
6051 }
6052 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006053 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006054 if (ret == NULL) return(NULL);
6055 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006056 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006057 NEXT;
6058 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006059 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006060 if (ret == NULL) return(NULL);
6061 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006062 if (cur != NULL)
6063 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006064 cur = ret;
6065 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006066 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006067 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006068 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006069 if (n->c1 != NULL)
6070 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006071 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006072 if (n != NULL)
6073 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006074 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006075 }
6076 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006077 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006078 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006079 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006080 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006081 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006082 return(NULL);
6083 }
6084 SKIP_BLANKS;
6085 GROW;
6086 }
6087 if ((RAW == ')') && (NXT(1) == '*')) {
6088 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006089 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006090 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006091 if (cur->c2 != NULL)
6092 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006094 if (ret != NULL)
6095 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006096 if (ctxt->input->id != inputchk) {
6097 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6098 "Element content declaration doesn't start and"
6099 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006100 }
Owen Taylor3473f882001-02-23 17:55:21 +00006101 SKIP(2);
6102 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006103 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006104 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006105 return(NULL);
6106 }
6107
6108 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006109 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 }
6111 return(ret);
6112}
6113
6114/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006115 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006116 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006117 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006118 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006119 *
6120 * parse the declaration for a Mixed Element content
6121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006122 *
Owen Taylor3473f882001-02-23 17:55:21 +00006123 *
6124 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6125 *
6126 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6127 *
6128 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6129 *
6130 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6131 *
6132 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6133 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006134 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006135 * opening or closing parentheses in a choice, seq, or Mixed
6136 * construct is contained in the replacement text for a parameter
6137 * entity, both must be contained in the same replacement text. For
6138 * interoperability, if a parameter-entity reference appears in a
6139 * choice, seq, or Mixed construct, its replacement text should not
6140 * be empty, and neither the first nor last non-blank character of
6141 * the replacement text should be a connector (| or ,).
6142 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006143 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006144 * hierarchy.
6145 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006146static xmlElementContentPtr
6147xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6148 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006149 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006150 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006151 xmlChar type = 0;
6152
Daniel Veillard489f9672009-08-10 16:49:30 +02006153 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6154 (depth > 2048)) {
6155 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6156"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6157 depth);
6158 return(NULL);
6159 }
Owen Taylor3473f882001-02-23 17:55:21 +00006160 SKIP_BLANKS;
6161 GROW;
6162 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006163 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006164
Owen Taylor3473f882001-02-23 17:55:21 +00006165 /* Recurse on first child */
6166 NEXT;
6167 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006168 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6169 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006170 SKIP_BLANKS;
6171 GROW;
6172 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006173 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006174 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006175 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006176 return(NULL);
6177 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006178 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006179 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006180 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006181 return(NULL);
6182 }
Owen Taylor3473f882001-02-23 17:55:21 +00006183 GROW;
6184 if (RAW == '?') {
6185 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6186 NEXT;
6187 } else if (RAW == '*') {
6188 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6189 NEXT;
6190 } else if (RAW == '+') {
6191 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6192 NEXT;
6193 } else {
6194 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6195 }
Owen Taylor3473f882001-02-23 17:55:21 +00006196 GROW;
6197 }
6198 SKIP_BLANKS;
6199 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006200 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006201 /*
6202 * Each loop we parse one separator and one element.
6203 */
6204 if (RAW == ',') {
6205 if (type == 0) type = CUR;
6206
6207 /*
6208 * Detect "Name | Name , Name" error
6209 */
6210 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006211 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006212 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006213 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006214 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006215 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006216 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006217 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006218 return(NULL);
6219 }
6220 NEXT;
6221
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006222 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006223 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006224 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006225 xmlFreeDocElementContent(ctxt->myDoc, last);
6226 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006227 return(NULL);
6228 }
6229 if (last == NULL) {
6230 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006231 if (ret != NULL)
6232 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006233 ret = cur = op;
6234 } else {
6235 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006236 if (op != NULL)
6237 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006238 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006239 if (last != NULL)
6240 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006241 cur =op;
6242 last = NULL;
6243 }
6244 } else if (RAW == '|') {
6245 if (type == 0) type = CUR;
6246
6247 /*
6248 * Detect "Name , Name | Name" error
6249 */
6250 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006251 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006252 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006253 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006254 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006255 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006256 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006257 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006258 return(NULL);
6259 }
6260 NEXT;
6261
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006262 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006263 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006264 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006265 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006266 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006267 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 return(NULL);
6269 }
6270 if (last == NULL) {
6271 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006272 if (ret != NULL)
6273 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006274 ret = cur = op;
6275 } else {
6276 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006277 if (op != NULL)
6278 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006279 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006280 if (last != NULL)
6281 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006282 cur =op;
6283 last = NULL;
6284 }
6285 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006286 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006287 if ((last != NULL) && (last != ret))
6288 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006289 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006290 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006291 return(NULL);
6292 }
6293 GROW;
6294 SKIP_BLANKS;
6295 GROW;
6296 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006297 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006298 /* Recurse on second child */
6299 NEXT;
6300 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006301 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6302 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006303 SKIP_BLANKS;
6304 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006305 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006307 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006308 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006309 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006310 return(NULL);
6311 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006312 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006313 if (last == NULL) {
6314 if (ret != NULL)
6315 xmlFreeDocElementContent(ctxt->myDoc, ret);
6316 return(NULL);
6317 }
Owen Taylor3473f882001-02-23 17:55:21 +00006318 if (RAW == '?') {
6319 last->ocur = XML_ELEMENT_CONTENT_OPT;
6320 NEXT;
6321 } else if (RAW == '*') {
6322 last->ocur = XML_ELEMENT_CONTENT_MULT;
6323 NEXT;
6324 } else if (RAW == '+') {
6325 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6326 NEXT;
6327 } else {
6328 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6329 }
6330 }
6331 SKIP_BLANKS;
6332 GROW;
6333 }
6334 if ((cur != NULL) && (last != NULL)) {
6335 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006336 if (last != NULL)
6337 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006338 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006339 if (ctxt->input->id != inputchk) {
6340 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6341 "Element content declaration doesn't start and stop in"
6342 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006343 }
Owen Taylor3473f882001-02-23 17:55:21 +00006344 NEXT;
6345 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006346 if (ret != NULL) {
6347 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6348 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6349 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6350 else
6351 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6352 }
Owen Taylor3473f882001-02-23 17:55:21 +00006353 NEXT;
6354 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006355 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006356 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006357 cur = ret;
6358 /*
6359 * Some normalization:
6360 * (a | b* | c?)* == (a | b | c)*
6361 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006363 if ((cur->c1 != NULL) &&
6364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6367 if ((cur->c2 != NULL) &&
6368 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6369 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6370 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6371 cur = cur->c2;
6372 }
6373 }
Owen Taylor3473f882001-02-23 17:55:21 +00006374 NEXT;
6375 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006376 if (ret != NULL) {
6377 int found = 0;
6378
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006379 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6380 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6381 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006382 else
6383 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006384 /*
6385 * Some normalization:
6386 * (a | b*)+ == (a | b)*
6387 * (a | b?)+ == (a | b)*
6388 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006389 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006390 if ((cur->c1 != NULL) &&
6391 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6392 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6393 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6394 found = 1;
6395 }
6396 if ((cur->c2 != NULL) &&
6397 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6398 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6399 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6400 found = 1;
6401 }
6402 cur = cur->c2;
6403 }
6404 if (found)
6405 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6406 }
Owen Taylor3473f882001-02-23 17:55:21 +00006407 NEXT;
6408 }
6409 return(ret);
6410}
6411
6412/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006413 * xmlParseElementChildrenContentDecl:
6414 * @ctxt: an XML parser context
6415 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006416 *
6417 * parse the declaration for a Mixed Element content
6418 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6419 *
6420 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6421 *
6422 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6423 *
6424 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6425 *
6426 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6427 *
6428 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6429 * TODO Parameter-entity replacement text must be properly nested
6430 * with parenthesized groups. That is to say, if either of the
6431 * opening or closing parentheses in a choice, seq, or Mixed
6432 * construct is contained in the replacement text for a parameter
6433 * entity, both must be contained in the same replacement text. For
6434 * interoperability, if a parameter-entity reference appears in a
6435 * choice, seq, or Mixed construct, its replacement text should not
6436 * be empty, and neither the first nor last non-blank character of
6437 * the replacement text should be a connector (| or ,).
6438 *
6439 * Returns the tree of xmlElementContentPtr describing the element
6440 * hierarchy.
6441 */
6442xmlElementContentPtr
6443xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6444 /* stub left for API/ABI compat */
6445 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6446}
6447
6448/**
Owen Taylor3473f882001-02-23 17:55:21 +00006449 * xmlParseElementContentDecl:
6450 * @ctxt: an XML parser context
6451 * @name: the name of the element being defined.
6452 * @result: the Element Content pointer will be stored here if any
6453 *
6454 * parse the declaration for an Element content either Mixed or Children,
6455 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006456 *
Owen Taylor3473f882001-02-23 17:55:21 +00006457 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6458 *
6459 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6460 */
6461
6462int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006463xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006464 xmlElementContentPtr *result) {
6465
6466 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006467 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006468 int res;
6469
6470 *result = NULL;
6471
6472 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006473 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006474 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006475 return(-1);
6476 }
6477 NEXT;
6478 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006479 if (ctxt->instate == XML_PARSER_EOF)
6480 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006481 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006482 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006483 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006484 res = XML_ELEMENT_TYPE_MIXED;
6485 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006486 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006487 res = XML_ELEMENT_TYPE_ELEMENT;
6488 }
Owen Taylor3473f882001-02-23 17:55:21 +00006489 SKIP_BLANKS;
6490 *result = tree;
6491 return(res);
6492}
6493
6494/**
6495 * xmlParseElementDecl:
6496 * @ctxt: an XML parser context
6497 *
6498 * parse an Element declaration.
6499 *
6500 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6501 *
6502 * [ VC: Unique Element Type Declaration ]
6503 * No element type may be declared more than once
6504 *
6505 * Returns the type of the element, or -1 in case of error
6506 */
6507int
6508xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006509 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006510 int ret = -1;
6511 xmlElementContentPtr content = NULL;
6512
Daniel Veillard4c778d82005-01-23 17:37:44 +00006513 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006514 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006515 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006516
6517 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006518 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6520 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006521 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006522 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006523 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006524 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006525 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6526 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006527 return(-1);
6528 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006529 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6531 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006532 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006533 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006534 SKIP(5);
6535 /*
6536 * Element must always be empty.
6537 */
6538 ret = XML_ELEMENT_TYPE_EMPTY;
6539 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6540 (NXT(2) == 'Y')) {
6541 SKIP(3);
6542 /*
6543 * Element is a generic container.
6544 */
6545 ret = XML_ELEMENT_TYPE_ANY;
6546 } else if (RAW == '(') {
6547 ret = xmlParseElementContentDecl(ctxt, name, &content);
6548 } else {
6549 /*
6550 * [ WFC: PEs in Internal Subset ] error handling.
6551 */
6552 if ((RAW == '%') && (ctxt->external == 0) &&
6553 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006554 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006555 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006556 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006557 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006558 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6559 }
Owen Taylor3473f882001-02-23 17:55:21 +00006560 return(-1);
6561 }
6562
6563 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006564
6565 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006566 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006567 if (content != NULL) {
6568 xmlFreeDocElementContent(ctxt->myDoc, content);
6569 }
Owen Taylor3473f882001-02-23 17:55:21 +00006570 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006571 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006572 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006573 "Element declaration doesn't start and stop in"
6574 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006575 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006576
Owen Taylor3473f882001-02-23 17:55:21 +00006577 NEXT;
6578 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006579 (ctxt->sax->elementDecl != NULL)) {
6580 if (content != NULL)
6581 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006582 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6583 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006584 if ((content != NULL) && (content->parent == NULL)) {
6585 /*
6586 * this is a trick: if xmlAddElementDecl is called,
6587 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006588 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006589 * interfaces or change the API/ABI
6590 */
6591 xmlFreeDocElementContent(ctxt->myDoc, content);
6592 }
6593 } else if (content != NULL) {
6594 xmlFreeDocElementContent(ctxt->myDoc, content);
6595 }
Owen Taylor3473f882001-02-23 17:55:21 +00006596 }
Owen Taylor3473f882001-02-23 17:55:21 +00006597 }
6598 return(ret);
6599}
6600
6601/**
Owen Taylor3473f882001-02-23 17:55:21 +00006602 * xmlParseConditionalSections
6603 * @ctxt: an XML parser context
6604 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006605 * [61] conditionalSect ::= includeSect | ignoreSect
6606 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006607 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6608 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6609 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6610 */
6611
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006612static void
Owen Taylor3473f882001-02-23 17:55:21 +00006613xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006614 int id = ctxt->input->id;
6615
Owen Taylor3473f882001-02-23 17:55:21 +00006616 SKIP(3);
6617 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006618 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006619 SKIP(7);
6620 SKIP_BLANKS;
6621 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006622 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006623 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006624 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006625 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006626 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006627 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6628 "All markup of the conditional section is not"
6629 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006630 }
Owen Taylor3473f882001-02-23 17:55:21 +00006631 NEXT;
6632 }
6633 if (xmlParserDebugEntities) {
6634 if ((ctxt->input != NULL) && (ctxt->input->filename))
6635 xmlGenericError(xmlGenericErrorContext,
6636 "%s(%d): ", ctxt->input->filename,
6637 ctxt->input->line);
6638 xmlGenericError(xmlGenericErrorContext,
6639 "Entering INCLUDE Conditional Section\n");
6640 }
6641
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006642 SKIP_BLANKS;
6643 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006644 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6645 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006646 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006647 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006648
6649 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6650 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006651 } else
6652 xmlParseMarkupDecl(ctxt);
6653
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006654 SKIP_BLANKS;
6655 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006656
Daniel Veillardfdc91562002-07-01 21:52:03 +00006657 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006658 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006659 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006660 break;
6661 }
6662 }
6663 if (xmlParserDebugEntities) {
6664 if ((ctxt->input != NULL) && (ctxt->input->filename))
6665 xmlGenericError(xmlGenericErrorContext,
6666 "%s(%d): ", ctxt->input->filename,
6667 ctxt->input->line);
6668 xmlGenericError(xmlGenericErrorContext,
6669 "Leaving INCLUDE Conditional Section\n");
6670 }
6671
Daniel Veillarda07050d2003-10-19 14:46:32 +00006672 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006673 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006674 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006675 int depth = 0;
6676
6677 SKIP(6);
6678 SKIP_BLANKS;
6679 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006680 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006681 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006682 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006683 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006684 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006685 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6686 "All markup of the conditional section is not"
6687 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006688 }
Owen Taylor3473f882001-02-23 17:55:21 +00006689 NEXT;
6690 }
6691 if (xmlParserDebugEntities) {
6692 if ((ctxt->input != NULL) && (ctxt->input->filename))
6693 xmlGenericError(xmlGenericErrorContext,
6694 "%s(%d): ", ctxt->input->filename,
6695 ctxt->input->line);
6696 xmlGenericError(xmlGenericErrorContext,
6697 "Entering IGNORE Conditional Section\n");
6698 }
6699
6700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006701 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006702 * But disable SAX event generating DTD building in the meantime
6703 */
6704 state = ctxt->disableSAX;
6705 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006707 ctxt->instate = XML_PARSER_IGNORE;
6708
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006709 while (((depth >= 0) && (RAW != 0)) &&
6710 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006711 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6712 depth++;
6713 SKIP(3);
6714 continue;
6715 }
6716 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6717 if (--depth >= 0) SKIP(3);
6718 continue;
6719 }
6720 NEXT;
6721 continue;
6722 }
6723
6724 ctxt->disableSAX = state;
6725 ctxt->instate = instate;
6726
6727 if (xmlParserDebugEntities) {
6728 if ((ctxt->input != NULL) && (ctxt->input->filename))
6729 xmlGenericError(xmlGenericErrorContext,
6730 "%s(%d): ", ctxt->input->filename,
6731 ctxt->input->line);
6732 xmlGenericError(xmlGenericErrorContext,
6733 "Leaving IGNORE Conditional Section\n");
6734 }
6735
6736 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006737 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006738 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006739 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006740 }
6741
6742 if (RAW == 0)
6743 SHRINK;
6744
6745 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006747 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006748 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006749 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 "All markup of the conditional section is not in"
6751 " the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006752 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006753 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006754 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006755 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006756 }
6757}
6758
6759/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006760 * xmlParseMarkupDecl:
6761 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006762 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006763 * parse Markup declarations
6764 *
6765 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6766 * NotationDecl | PI | Comment
6767 *
6768 * [ VC: Proper Declaration/PE Nesting ]
6769 * Parameter-entity replacement text must be properly nested with
6770 * markup declarations. That is to say, if either the first character
6771 * or the last character of a markup declaration (markupdecl above) is
6772 * contained in the replacement text for a parameter-entity reference,
6773 * both must be contained in the same replacement text.
6774 *
6775 * [ WFC: PEs in Internal Subset ]
6776 * In the internal DTD subset, parameter-entity references can occur
6777 * only where markup declarations can occur, not within markup declarations.
6778 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006779 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006780 */
6781void
6782xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6783 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006784 if (CUR == '<') {
6785 if (NXT(1) == '!') {
6786 switch (NXT(2)) {
6787 case 'E':
6788 if (NXT(3) == 'L')
6789 xmlParseElementDecl(ctxt);
6790 else if (NXT(3) == 'N')
6791 xmlParseEntityDecl(ctxt);
6792 break;
6793 case 'A':
6794 xmlParseAttributeListDecl(ctxt);
6795 break;
6796 case 'N':
6797 xmlParseNotationDecl(ctxt);
6798 break;
6799 case '-':
6800 xmlParseComment(ctxt);
6801 break;
6802 default:
6803 /* there is an error but it will be detected later */
6804 break;
6805 }
6806 } else if (NXT(1) == '?') {
6807 xmlParsePI(ctxt);
6808 }
6809 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006810
6811 /*
6812 * detect requirement to exit there and act accordingly
6813 * and avoid having instate overriden later on
6814 */
6815 if (ctxt->instate == XML_PARSER_EOF)
6816 return;
6817
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006818 /*
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006819 * Conditional sections are allowed from entities included
6820 * by PE References in the internal subset.
6821 */
6822 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 xmlParseConditionalSections(ctxt);
6825 }
6826 }
6827
6828 ctxt->instate = XML_PARSER_DTD;
6829}
6830
6831/**
6832 * xmlParseTextDecl:
6833 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006834 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006835 * parse an XML declaration header for external entities
6836 *
6837 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006838 */
6839
6840void
6841xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6842 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006843 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006844
6845 /*
6846 * We know that '<?xml' is here.
6847 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006848 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006849 SKIP(5);
6850 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006851 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006852 return;
6853 }
6854
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006855 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006856 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6857 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006858 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006859
6860 /*
6861 * We may have the VersionInfo here.
6862 */
6863 version = xmlParseVersionInfo(ctxt);
6864 if (version == NULL)
6865 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006866 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006867 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6869 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006870 }
6871 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006872 ctxt->input->version = version;
6873
6874 /*
6875 * We must have the encoding declaration
6876 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006877 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006878 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6879 /*
6880 * The XML REC instructs us to stop parsing right here
6881 */
6882 return;
6883 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006884 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6885 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6886 "Missing encoding in text declaration\n");
6887 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006888
6889 SKIP_BLANKS;
6890 if ((RAW == '?') && (NXT(1) == '>')) {
6891 SKIP(2);
6892 } else if (RAW == '>') {
6893 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006894 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006895 NEXT;
6896 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006897 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006898 MOVETO_ENDTAG(CUR_PTR);
6899 NEXT;
6900 }
6901}
6902
6903/**
Owen Taylor3473f882001-02-23 17:55:21 +00006904 * xmlParseExternalSubset:
6905 * @ctxt: an XML parser context
6906 * @ExternalID: the external identifier
6907 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006908 *
Owen Taylor3473f882001-02-23 17:55:21 +00006909 * parse Markup declarations from an external subset
6910 *
6911 * [30] extSubset ::= textDecl? extSubsetDecl
6912 *
6913 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6914 */
6915void
6916xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6917 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006918 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006919 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006920
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006921 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006922 (ctxt->input->end - ctxt->input->cur >= 4)) {
6923 xmlChar start[4];
6924 xmlCharEncoding enc;
6925
6926 start[0] = RAW;
6927 start[1] = NXT(1);
6928 start[2] = NXT(2);
6929 start[3] = NXT(3);
6930 enc = xmlDetectCharEncoding(start, 4);
6931 if (enc != XML_CHAR_ENCODING_NONE)
6932 xmlSwitchEncoding(ctxt, enc);
6933 }
6934
Daniel Veillarda07050d2003-10-19 14:46:32 +00006935 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006936 xmlParseTextDecl(ctxt);
6937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6938 /*
6939 * The XML REC instructs us to stop parsing right here
6940 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08006941 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006942 return;
6943 }
6944 }
6945 if (ctxt->myDoc == NULL) {
6946 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006947 if (ctxt->myDoc == NULL) {
6948 xmlErrMemory(ctxt, "New Doc failed");
6949 return;
6950 }
6951 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006952 }
6953 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6954 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6955
6956 ctxt->instate = XML_PARSER_DTD;
6957 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006958 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006959 while (((RAW == '<') && (NXT(1) == '?')) ||
6960 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006961 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006962 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006963 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006964
6965 GROW;
6966 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6967 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006968 } else
6969 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006970 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006971
Daniel Veillardfdc91562002-07-01 21:52:03 +00006972 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006973 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006974 break;
6975 }
6976 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006977
Owen Taylor3473f882001-02-23 17:55:21 +00006978 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006979 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006980 }
6981
6982}
6983
6984/**
6985 * xmlParseReference:
6986 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006987 *
Owen Taylor3473f882001-02-23 17:55:21 +00006988 * parse and handle entity references in content, depending on the SAX
6989 * interface, this may end-up in a call to character() if this is a
6990 * CharRef, a predefined entity, if there is no reference() callback.
6991 * or if the parser was asked to switch to that mode.
6992 *
6993 * [67] Reference ::= EntityRef | CharRef
6994 */
6995void
6996xmlParseReference(xmlParserCtxtPtr ctxt) {
6997 xmlEntityPtr ent;
6998 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006999 int was_checked;
7000 xmlNodePtr list = NULL;
7001 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007002
Daniel Veillard0161e632008-08-28 15:36:32 +00007003
7004 if (RAW != '&')
7005 return;
7006
7007 /*
7008 * Simple case of a CharRef
7009 */
Owen Taylor3473f882001-02-23 17:55:21 +00007010 if (NXT(1) == '#') {
7011 int i = 0;
7012 xmlChar out[10];
7013 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007014 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007015
Daniel Veillarddc171602008-03-26 17:41:38 +00007016 if (value == 0)
7017 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007018 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7019 /*
7020 * So we are using non-UTF-8 buffers
7021 * Check that the char fit on 8bits, if not
7022 * generate a CharRef.
7023 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007024 if (value <= 0xFF) {
7025 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007026 out[1] = 0;
7027 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7028 (!ctxt->disableSAX))
7029 ctxt->sax->characters(ctxt->userData, out, 1);
7030 } else {
7031 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007032 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007033 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007034 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007035 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7036 (!ctxt->disableSAX))
7037 ctxt->sax->reference(ctxt->userData, out);
7038 }
7039 } else {
7040 /*
7041 * Just encode the value in UTF-8
7042 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007043 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007044 out[i] = 0;
7045 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7046 (!ctxt->disableSAX))
7047 ctxt->sax->characters(ctxt->userData, out, i);
7048 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007049 return;
7050 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007051
Daniel Veillard0161e632008-08-28 15:36:32 +00007052 /*
7053 * We are seeing an entity reference
7054 */
7055 ent = xmlParseEntityRef(ctxt);
7056 if (ent == NULL) return;
7057 if (!ctxt->wellFormed)
7058 return;
7059 was_checked = ent->checked;
7060
7061 /* special case of predefined entities */
7062 if ((ent->name == NULL) ||
7063 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7064 val = ent->content;
7065 if (val == NULL) return;
7066 /*
7067 * inline the entity.
7068 */
7069 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7070 (!ctxt->disableSAX))
7071 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7072 return;
7073 }
7074
7075 /*
7076 * The first reference to the entity trigger a parsing phase
7077 * where the ent->children is filled with the result from
7078 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007079 * Note: external parsed entities will not be loaded, it is not
7080 * required for a non-validating parser, unless the parsing option
7081 * of validating, or substituting entities were given. Doing so is
7082 * far more secure as the parser will only process data coming from
7083 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007084 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007085 if (((ent->checked == 0) ||
7086 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007087 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7088 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007089 unsigned long oldnbent = ctxt->nbentities;
7090
7091 /*
7092 * This is a bit hackish but this seems the best
7093 * way to make sure both SAX and DOM entity support
7094 * behaves okay.
7095 */
7096 void *user_data;
7097 if (ctxt->userData == ctxt)
7098 user_data = NULL;
7099 else
7100 user_data = ctxt->userData;
7101
7102 /*
7103 * Check that this entity is well formed
7104 * 4.3.2: An internal general parsed entity is well-formed
7105 * if its replacement text matches the production labeled
7106 * content.
7107 */
7108 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7109 ctxt->depth++;
7110 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7111 user_data, &list);
7112 ctxt->depth--;
7113
7114 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7115 ctxt->depth++;
7116 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7117 user_data, ctxt->depth, ent->URI,
7118 ent->ExternalID, &list);
7119 ctxt->depth--;
7120 } else {
7121 ret = XML_ERR_ENTITY_PE_INTERNAL;
7122 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7123 "invalid entity type found\n", NULL);
7124 }
7125
7126 /*
7127 * Store the number of entities needing parsing for this entity
7128 * content and do checkings
7129 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007130 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7131 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7132 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007133 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007134 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007135 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007136 return;
7137 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007138 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007139 xmlFreeNodeList(list);
7140 return;
7141 }
Owen Taylor3473f882001-02-23 17:55:21 +00007142
Daniel Veillard0161e632008-08-28 15:36:32 +00007143 if ((ret == XML_ERR_OK) && (list != NULL)) {
7144 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7145 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7146 (ent->children == NULL)) {
7147 ent->children = list;
7148 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007149 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007150 * Prune it directly in the generated document
7151 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007152 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007153 if (((list->type == XML_TEXT_NODE) &&
7154 (list->next == NULL)) ||
7155 (ctxt->parseMode == XML_PARSE_READER)) {
7156 list->parent = (xmlNodePtr) ent;
7157 list = NULL;
7158 ent->owner = 1;
7159 } else {
7160 ent->owner = 0;
7161 while (list != NULL) {
7162 list->parent = (xmlNodePtr) ctxt->node;
7163 list->doc = ctxt->myDoc;
7164 if (list->next == NULL)
7165 ent->last = list;
7166 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007167 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007168 list = ent->children;
7169#ifdef LIBXML_LEGACY_ENABLED
7170 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7171 xmlAddEntityReference(ent, list, NULL);
7172#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007173 }
7174 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007175 ent->owner = 1;
7176 while (list != NULL) {
7177 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007178 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007179 if (list->next == NULL)
7180 ent->last = list;
7181 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007182 }
7183 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007184 } else {
7185 xmlFreeNodeList(list);
7186 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007187 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007188 } else if ((ret != XML_ERR_OK) &&
7189 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7190 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7191 "Entity '%s' failed to parse\n", ent->name);
Nick Wellnhofer60dded12018-01-22 15:04:58 +01007192 if (ent->content != NULL)
7193 ent->content[0] = 0;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007194 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007195 } else if (list != NULL) {
7196 xmlFreeNodeList(list);
7197 list = NULL;
7198 }
7199 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007200 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007201
7202 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7203 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007204 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007205 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007206 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007207
Daniel Veillard0161e632008-08-28 15:36:32 +00007208 /*
7209 * Now that the entity content has been gathered
7210 * provide it to the application, this can take different forms based
7211 * on the parsing modes.
7212 */
7213 if (ent->children == NULL) {
7214 /*
7215 * Probably running in SAX mode and the callbacks don't
7216 * build the entity content. So unless we already went
7217 * though parsing for first checking go though the entity
7218 * content to generate callbacks associated to the entity
7219 */
7220 if (was_checked != 0) {
7221 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007222 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007223 * This is a bit hackish but this seems the best
7224 * way to make sure both SAX and DOM entity support
7225 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007226 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007227 if (ctxt->userData == ctxt)
7228 user_data = NULL;
7229 else
7230 user_data = ctxt->userData;
7231
7232 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7233 ctxt->depth++;
7234 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7235 ent->content, user_data, NULL);
7236 ctxt->depth--;
7237 } else if (ent->etype ==
7238 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7239 ctxt->depth++;
7240 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7241 ctxt->sax, user_data, ctxt->depth,
7242 ent->URI, ent->ExternalID, NULL);
7243 ctxt->depth--;
7244 } else {
7245 ret = XML_ERR_ENTITY_PE_INTERNAL;
7246 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7247 "invalid entity type found\n", NULL);
7248 }
7249 if (ret == XML_ERR_ENTITY_LOOP) {
7250 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7251 return;
7252 }
7253 }
7254 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7255 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7256 /*
7257 * Entity reference callback comes second, it's somewhat
7258 * superfluous but a compatibility to historical behaviour
7259 */
7260 ctxt->sax->reference(ctxt->userData, ent->name);
7261 }
7262 return;
7263 }
7264
7265 /*
7266 * If we didn't get any children for the entity being built
7267 */
7268 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7269 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7270 /*
7271 * Create a node.
7272 */
7273 ctxt->sax->reference(ctxt->userData, ent->name);
7274 return;
7275 }
7276
7277 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7278 /*
7279 * There is a problem on the handling of _private for entities
7280 * (bug 155816): Should we copy the content of the field from
7281 * the entity (possibly overwriting some value set by the user
7282 * when a copy is created), should we leave it alone, or should
7283 * we try to take care of different situations? The problem
7284 * is exacerbated by the usage of this field by the xmlReader.
7285 * To fix this bug, we look at _private on the created node
7286 * and, if it's NULL, we copy in whatever was in the entity.
7287 * If it's not NULL we leave it alone. This is somewhat of a
7288 * hack - maybe we should have further tests to determine
7289 * what to do.
7290 */
7291 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7292 /*
7293 * Seems we are generating the DOM content, do
7294 * a simple tree copy for all references except the first
7295 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007296 */
7297 if (((list == NULL) && (ent->owner == 0)) ||
7298 (ctxt->parseMode == XML_PARSE_READER)) {
7299 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7300
7301 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007302 * We are copying here, make sure there is no abuse
7303 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007304 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007305 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7306 return;
7307
7308 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007309 * when operating on a reader, the entities definitions
7310 * are always owning the entities subtree.
7311 if (ctxt->parseMode == XML_PARSE_READER)
7312 ent->owner = 1;
7313 */
7314
7315 cur = ent->children;
7316 while (cur != NULL) {
7317 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7318 if (nw != NULL) {
7319 if (nw->_private == NULL)
7320 nw->_private = cur->_private;
7321 if (firstChild == NULL){
7322 firstChild = nw;
7323 }
7324 nw = xmlAddChild(ctxt->node, nw);
7325 }
7326 if (cur == ent->last) {
7327 /*
7328 * needed to detect some strange empty
7329 * node cases in the reader tests
7330 */
7331 if ((ctxt->parseMode == XML_PARSE_READER) &&
7332 (nw != NULL) &&
7333 (nw->type == XML_ELEMENT_NODE) &&
7334 (nw->children == NULL))
7335 nw->extra = 1;
7336
7337 break;
7338 }
7339 cur = cur->next;
7340 }
7341#ifdef LIBXML_LEGACY_ENABLED
7342 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7343 xmlAddEntityReference(ent, firstChild, nw);
7344#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007345 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007346 xmlNodePtr nw = NULL, cur, next, last,
7347 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007348
7349 /*
7350 * We are copying here, make sure there is no abuse
7351 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007352 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007353 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7354 return;
7355
Daniel Veillard0161e632008-08-28 15:36:32 +00007356 /*
7357 * Copy the entity child list and make it the new
7358 * entity child list. The goal is to make sure any
7359 * ID or REF referenced will be the one from the
7360 * document content and not the entity copy.
7361 */
7362 cur = ent->children;
7363 ent->children = NULL;
7364 last = ent->last;
7365 ent->last = NULL;
7366 while (cur != NULL) {
7367 next = cur->next;
7368 cur->next = NULL;
7369 cur->parent = NULL;
7370 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7371 if (nw != NULL) {
7372 if (nw->_private == NULL)
7373 nw->_private = cur->_private;
7374 if (firstChild == NULL){
7375 firstChild = cur;
7376 }
7377 xmlAddChild((xmlNodePtr) ent, nw);
7378 xmlAddChild(ctxt->node, cur);
7379 }
7380 if (cur == last)
7381 break;
7382 cur = next;
7383 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007384 if (ent->owner == 0)
7385 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007386#ifdef LIBXML_LEGACY_ENABLED
7387 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7388 xmlAddEntityReference(ent, firstChild, nw);
7389#endif /* LIBXML_LEGACY_ENABLED */
7390 } else {
7391 const xmlChar *nbktext;
7392
7393 /*
7394 * the name change is to avoid coalescing of the
7395 * node with a possible previous text one which
7396 * would make ent->children a dangling pointer
7397 */
7398 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7399 -1);
7400 if (ent->children->type == XML_TEXT_NODE)
7401 ent->children->name = nbktext;
7402 if ((ent->last != ent->children) &&
7403 (ent->last->type == XML_TEXT_NODE))
7404 ent->last->name = nbktext;
7405 xmlAddChildList(ctxt->node, ent->children);
7406 }
7407
7408 /*
7409 * This is to avoid a nasty side effect, see
7410 * characters() in SAX.c
7411 */
7412 ctxt->nodemem = 0;
7413 ctxt->nodelen = 0;
7414 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007415 }
7416 }
7417}
7418
7419/**
7420 * xmlParseEntityRef:
7421 * @ctxt: an XML parser context
7422 *
7423 * parse ENTITY references declarations
7424 *
7425 * [68] EntityRef ::= '&' Name ';'
7426 *
7427 * [ WFC: Entity Declared ]
7428 * In a document without any DTD, a document with only an internal DTD
7429 * subset which contains no parameter entity references, or a document
7430 * with "standalone='yes'", the Name given in the entity reference
7431 * must match that in an entity declaration, except that well-formed
7432 * documents need not declare any of the following entities: amp, lt,
7433 * gt, apos, quot. The declaration of a parameter entity must precede
7434 * any reference to it. Similarly, the declaration of a general entity
7435 * must precede any reference to it which appears in a default value in an
7436 * attribute-list declaration. Note that if entities are declared in the
7437 * external subset or in external parameter entities, a non-validating
7438 * processor is not obligated to read and process their declarations;
7439 * for such documents, the rule that an entity must be declared is a
7440 * well-formedness constraint only if standalone='yes'.
7441 *
7442 * [ WFC: Parsed Entity ]
7443 * An entity reference must not contain the name of an unparsed entity
7444 *
7445 * Returns the xmlEntityPtr if found, or NULL otherwise.
7446 */
7447xmlEntityPtr
7448xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007449 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007450 xmlEntityPtr ent = NULL;
7451
7452 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007453 if (ctxt->instate == XML_PARSER_EOF)
7454 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007455
Daniel Veillard0161e632008-08-28 15:36:32 +00007456 if (RAW != '&')
7457 return(NULL);
7458 NEXT;
7459 name = xmlParseName(ctxt);
7460 if (name == NULL) {
7461 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7462 "xmlParseEntityRef: no name\n");
7463 return(NULL);
7464 }
7465 if (RAW != ';') {
7466 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7467 return(NULL);
7468 }
7469 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007470
Daniel Veillard0161e632008-08-28 15:36:32 +00007471 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007472 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007473 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007474 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7475 ent = xmlGetPredefinedEntity(name);
7476 if (ent != NULL)
7477 return(ent);
7478 }
Owen Taylor3473f882001-02-23 17:55:21 +00007479
Daniel Veillard0161e632008-08-28 15:36:32 +00007480 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007481 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007482 */
7483 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007484
Daniel Veillard0161e632008-08-28 15:36:32 +00007485 /*
7486 * Ask first SAX for entity resolution, otherwise try the
7487 * entities which may have stored in the parser context.
7488 */
7489 if (ctxt->sax != NULL) {
7490 if (ctxt->sax->getEntity != NULL)
7491 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007492 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007493 (ctxt->options & XML_PARSE_OLDSAX))
7494 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496 (ctxt->userData==ctxt)) {
7497 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007498 }
7499 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007500 if (ctxt->instate == XML_PARSER_EOF)
7501 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007502 /*
7503 * [ WFC: Entity Declared ]
7504 * In a document without any DTD, a document with only an
7505 * internal DTD subset which contains no parameter entity
7506 * references, or a document with "standalone='yes'", the
7507 * Name given in the entity reference must match that in an
7508 * entity declaration, except that well-formed documents
7509 * need not declare any of the following entities: amp, lt,
7510 * gt, apos, quot.
7511 * The declaration of a parameter entity must precede any
7512 * reference to it.
7513 * Similarly, the declaration of a general entity must
7514 * precede any reference to it which appears in a default
7515 * value in an attribute-list declaration. Note that if
7516 * entities are declared in the external subset or in
7517 * external parameter entities, a non-validating processor
7518 * is not obligated to read and process their declarations;
7519 * for such documents, the rule that an entity must be
7520 * declared is a well-formedness constraint only if
7521 * standalone='yes'.
7522 */
7523 if (ent == NULL) {
7524 if ((ctxt->standalone == 1) ||
7525 ((ctxt->hasExternalSubset == 0) &&
7526 (ctxt->hasPErefs == 0))) {
7527 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7528 "Entity '%s' not defined\n", name);
7529 } else {
7530 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7531 "Entity '%s' not defined\n", name);
7532 if ((ctxt->inSubset == 0) &&
7533 (ctxt->sax != NULL) &&
7534 (ctxt->sax->reference != NULL)) {
7535 ctxt->sax->reference(ctxt->userData, name);
7536 }
7537 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007538 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007539 ctxt->valid = 0;
7540 }
7541
7542 /*
7543 * [ WFC: Parsed Entity ]
7544 * An entity reference must not contain the name of an
7545 * unparsed entity
7546 */
7547 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7548 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7549 "Entity reference to unparsed entity %s\n", name);
7550 }
7551
7552 /*
7553 * [ WFC: No External Entity References ]
7554 * Attribute values cannot contain direct or indirect
7555 * entity references to external entities.
7556 */
7557 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7558 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7559 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7560 "Attribute references external entity '%s'\n", name);
7561 }
7562 /*
7563 * [ WFC: No < in Attribute Values ]
7564 * The replacement text of any entity referred to directly or
7565 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007566 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007567 */
7568 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007569 (ent != NULL) &&
7570 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007571 if (((ent->checked & 1) || (ent->checked == 0)) &&
7572 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007573 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7574 "'<' in entity '%s' is not allowed in attributes values\n", name);
7575 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007576 }
7577
7578 /*
7579 * Internal check, no parameter entities here ...
7580 */
7581 else {
7582 switch (ent->etype) {
7583 case XML_INTERNAL_PARAMETER_ENTITY:
7584 case XML_EXTERNAL_PARAMETER_ENTITY:
7585 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7586 "Attempt to reference the parameter entity '%s'\n",
7587 name);
7588 break;
7589 default:
7590 break;
7591 }
7592 }
7593
7594 /*
7595 * [ WFC: No Recursion ]
7596 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007597 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007598 * Done somewhere else
7599 */
Owen Taylor3473f882001-02-23 17:55:21 +00007600 return(ent);
7601}
7602
7603/**
7604 * xmlParseStringEntityRef:
7605 * @ctxt: an XML parser context
7606 * @str: a pointer to an index in the string
7607 *
7608 * parse ENTITY references declarations, but this version parses it from
7609 * a string value.
7610 *
7611 * [68] EntityRef ::= '&' Name ';'
7612 *
7613 * [ WFC: Entity Declared ]
7614 * In a document without any DTD, a document with only an internal DTD
7615 * subset which contains no parameter entity references, or a document
7616 * with "standalone='yes'", the Name given in the entity reference
7617 * must match that in an entity declaration, except that well-formed
7618 * documents need not declare any of the following entities: amp, lt,
7619 * gt, apos, quot. The declaration of a parameter entity must precede
7620 * any reference to it. Similarly, the declaration of a general entity
7621 * must precede any reference to it which appears in a default value in an
7622 * attribute-list declaration. Note that if entities are declared in the
7623 * external subset or in external parameter entities, a non-validating
7624 * processor is not obligated to read and process their declarations;
7625 * for such documents, the rule that an entity must be declared is a
7626 * well-formedness constraint only if standalone='yes'.
7627 *
7628 * [ WFC: Parsed Entity ]
7629 * An entity reference must not contain the name of an unparsed entity
7630 *
7631 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7632 * is updated to the current location in the string.
7633 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007634static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007635xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7636 xmlChar *name;
7637 const xmlChar *ptr;
7638 xmlChar cur;
7639 xmlEntityPtr ent = NULL;
7640
7641 if ((str == NULL) || (*str == NULL))
7642 return(NULL);
7643 ptr = *str;
7644 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007645 if (cur != '&')
7646 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007647
Daniel Veillard0161e632008-08-28 15:36:32 +00007648 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007649 name = xmlParseStringName(ctxt, &ptr);
7650 if (name == NULL) {
7651 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7652 "xmlParseStringEntityRef: no name\n");
7653 *str = ptr;
7654 return(NULL);
7655 }
7656 if (*ptr != ';') {
7657 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007658 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007659 *str = ptr;
7660 return(NULL);
7661 }
7662 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007663
Owen Taylor3473f882001-02-23 17:55:21 +00007664
Daniel Veillard0161e632008-08-28 15:36:32 +00007665 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007666 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007667 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007668 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7669 ent = xmlGetPredefinedEntity(name);
7670 if (ent != NULL) {
7671 xmlFree(name);
7672 *str = ptr;
7673 return(ent);
7674 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007675 }
Owen Taylor3473f882001-02-23 17:55:21 +00007676
Daniel Veillard0161e632008-08-28 15:36:32 +00007677 /*
7678 * Increate the number of entity references parsed
7679 */
7680 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007681
Daniel Veillard0161e632008-08-28 15:36:32 +00007682 /*
7683 * Ask first SAX for entity resolution, otherwise try the
7684 * entities which may have stored in the parser context.
7685 */
7686 if (ctxt->sax != NULL) {
7687 if (ctxt->sax->getEntity != NULL)
7688 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007689 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7690 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007691 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7692 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007693 }
7694 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007695 if (ctxt->instate == XML_PARSER_EOF) {
7696 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007697 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007698 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007699
7700 /*
7701 * [ WFC: Entity Declared ]
7702 * In a document without any DTD, a document with only an
7703 * internal DTD subset which contains no parameter entity
7704 * references, or a document with "standalone='yes'", the
7705 * Name given in the entity reference must match that in an
7706 * entity declaration, except that well-formed documents
7707 * need not declare any of the following entities: amp, lt,
7708 * gt, apos, quot.
7709 * The declaration of a parameter entity must precede any
7710 * reference to it.
7711 * Similarly, the declaration of a general entity must
7712 * precede any reference to it which appears in a default
7713 * value in an attribute-list declaration. Note that if
7714 * entities are declared in the external subset or in
7715 * external parameter entities, a non-validating processor
7716 * is not obligated to read and process their declarations;
7717 * for such documents, the rule that an entity must be
7718 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007719 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007720 */
7721 if (ent == NULL) {
7722 if ((ctxt->standalone == 1) ||
7723 ((ctxt->hasExternalSubset == 0) &&
7724 (ctxt->hasPErefs == 0))) {
7725 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7726 "Entity '%s' not defined\n", name);
7727 } else {
7728 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7729 "Entity '%s' not defined\n",
7730 name);
7731 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007732 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007733 /* TODO ? check regressions ctxt->valid = 0; */
7734 }
7735
7736 /*
7737 * [ WFC: Parsed Entity ]
7738 * An entity reference must not contain the name of an
7739 * unparsed entity
7740 */
7741 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7742 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7743 "Entity reference to unparsed entity %s\n", name);
7744 }
7745
7746 /*
7747 * [ WFC: No External Entity References ]
7748 * Attribute values cannot contain direct or indirect
7749 * entity references to external entities.
7750 */
7751 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7752 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7753 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7754 "Attribute references external entity '%s'\n", name);
7755 }
7756 /*
7757 * [ WFC: No < in Attribute Values ]
7758 * The replacement text of any entity referred to directly or
7759 * indirectly in an attribute value (other than "&lt;") must
7760 * not contain a <.
7761 */
7762 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7763 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007764 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007765 (xmlStrchr(ent->content, '<'))) {
7766 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7767 "'<' in entity '%s' is not allowed in attributes values\n",
7768 name);
7769 }
7770
7771 /*
7772 * Internal check, no parameter entities here ...
7773 */
7774 else {
7775 switch (ent->etype) {
7776 case XML_INTERNAL_PARAMETER_ENTITY:
7777 case XML_EXTERNAL_PARAMETER_ENTITY:
7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7779 "Attempt to reference the parameter entity '%s'\n",
7780 name);
7781 break;
7782 default:
7783 break;
7784 }
7785 }
7786
7787 /*
7788 * [ WFC: No Recursion ]
7789 * A parsed entity must not contain a recursive reference
7790 * to itself, either directly or indirectly.
7791 * Done somewhere else
7792 */
7793
7794 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007795 *str = ptr;
7796 return(ent);
7797}
7798
7799/**
7800 * xmlParsePEReference:
7801 * @ctxt: an XML parser context
7802 *
7803 * parse PEReference declarations
7804 * The entity content is handled directly by pushing it's content as
7805 * a new input stream.
7806 *
7807 * [69] PEReference ::= '%' Name ';'
7808 *
7809 * [ WFC: No Recursion ]
7810 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007811 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007812 *
7813 * [ WFC: Entity Declared ]
7814 * In a document without any DTD, a document with only an internal DTD
7815 * subset which contains no parameter entity references, or a document
7816 * with "standalone='yes'", ... ... The declaration of a parameter
7817 * entity must precede any reference to it...
7818 *
7819 * [ VC: Entity Declared ]
7820 * In a document with an external subset or external parameter entities
7821 * with "standalone='no'", ... ... The declaration of a parameter entity
7822 * must precede any reference to it...
7823 *
7824 * [ WFC: In DTD ]
7825 * Parameter-entity references may only appear in the DTD.
7826 * NOTE: misleading but this is handled.
7827 */
7828void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007829xmlParsePEReference(xmlParserCtxtPtr ctxt)
7830{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007831 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007832 xmlEntityPtr entity = NULL;
7833 xmlParserInputPtr input;
7834
Daniel Veillard0161e632008-08-28 15:36:32 +00007835 if (RAW != '%')
7836 return;
7837 NEXT;
7838 name = xmlParseName(ctxt);
7839 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007840 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007841 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007842 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007843 if (xmlParserDebugEntities)
7844 xmlGenericError(xmlGenericErrorContext,
7845 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007846 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007847 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007848 return;
7849 }
7850
7851 NEXT;
7852
7853 /*
7854 * Increate the number of entity references parsed
7855 */
7856 ctxt->nbentities++;
7857
7858 /*
7859 * Request the entity from SAX
7860 */
7861 if ((ctxt->sax != NULL) &&
7862 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007863 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7864 if (ctxt->instate == XML_PARSER_EOF)
7865 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007866 if (entity == NULL) {
7867 /*
7868 * [ WFC: Entity Declared ]
7869 * In a document without any DTD, a document with only an
7870 * internal DTD subset which contains no parameter entity
7871 * references, or a document with "standalone='yes'", ...
7872 * ... The declaration of a parameter entity must precede
7873 * any reference to it...
7874 */
7875 if ((ctxt->standalone == 1) ||
7876 ((ctxt->hasExternalSubset == 0) &&
7877 (ctxt->hasPErefs == 0))) {
7878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7879 "PEReference: %%%s; not found\n",
7880 name);
7881 } else {
7882 /*
7883 * [ VC: Entity Declared ]
7884 * In a document with an external subset or external
7885 * parameter entities with "standalone='no'", ...
7886 * ... The declaration of a parameter entity must
7887 * precede any reference to it...
7888 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007889 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7890 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7891 "PEReference: %%%s; not found\n",
7892 name, NULL);
7893 } else
7894 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895 "PEReference: %%%s; not found\n",
7896 name, NULL);
7897 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007898 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007899 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007900 } else {
7901 /*
7902 * Internal checking in case the entity quest barfed
7903 */
7904 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7905 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 "Internal: %%%s; is not a parameter entity\n",
7908 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007909 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007910 xmlChar start[4];
7911 xmlCharEncoding enc;
7912
Neel Mehta90ccb582017-04-07 17:43:02 +02007913 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7914 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7915 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7916 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7917 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7918 (ctxt->replaceEntities == 0) &&
7919 (ctxt->validate == 0))
7920 return;
7921
Daniel Veillard0161e632008-08-28 15:36:32 +00007922 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007923 if (xmlPushInput(ctxt, input) < 0) {
7924 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00007925 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007926 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02007927
7928 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7929 /*
7930 * Get the 4 first bytes and decode the charset
7931 * if enc != XML_CHAR_ENCODING_NONE
7932 * plug some encoding conversion routines.
7933 * Note that, since we may have some non-UTF8
7934 * encoding (like UTF16, bug 135229), the 'length'
7935 * is not known, but we can calculate based upon
7936 * the amount of data in the buffer.
7937 */
7938 GROW
7939 if (ctxt->instate == XML_PARSER_EOF)
7940 return;
7941 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7942 start[0] = RAW;
7943 start[1] = NXT(1);
7944 start[2] = NXT(2);
7945 start[3] = NXT(3);
7946 enc = xmlDetectCharEncoding(start, 4);
7947 if (enc != XML_CHAR_ENCODING_NONE) {
7948 xmlSwitchEncoding(ctxt, enc);
7949 }
7950 }
7951
7952 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7953 (IS_BLANK_CH(NXT(5)))) {
7954 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02007955 }
7956 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007957 }
7958 }
7959 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007960}
7961
7962/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007963 * xmlLoadEntityContent:
7964 * @ctxt: an XML parser context
7965 * @entity: an unloaded system entity
7966 *
7967 * Load the original content of the given system entity from the
7968 * ExternalID/SystemID given. This is to be used for Included in Literal
7969 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7970 *
7971 * Returns 0 in case of success and -1 in case of failure
7972 */
7973static int
7974xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7975 xmlParserInputPtr input;
7976 xmlBufferPtr buf;
7977 int l, c;
7978 int count = 0;
7979
7980 if ((ctxt == NULL) || (entity == NULL) ||
7981 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7982 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7983 (entity->content != NULL)) {
7984 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7985 "xmlLoadEntityContent parameter error");
7986 return(-1);
7987 }
7988
7989 if (xmlParserDebugEntities)
7990 xmlGenericError(xmlGenericErrorContext,
7991 "Reading %s entity content input\n", entity->name);
7992
7993 buf = xmlBufferCreate();
7994 if (buf == NULL) {
7995 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7996 "xmlLoadEntityContent parameter error");
7997 return(-1);
7998 }
7999
8000 input = xmlNewEntityInputStream(ctxt, entity);
8001 if (input == NULL) {
8002 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8003 "xmlLoadEntityContent input error");
8004 xmlBufferFree(buf);
8005 return(-1);
8006 }
8007
8008 /*
8009 * Push the entity as the current input, read char by char
8010 * saving to the buffer until the end of the entity or an error
8011 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008012 if (xmlPushInput(ctxt, input) < 0) {
8013 xmlBufferFree(buf);
8014 return(-1);
8015 }
8016
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008017 GROW;
8018 c = CUR_CHAR(l);
8019 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8020 (IS_CHAR(c))) {
8021 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008022 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008023 count = 0;
8024 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008025 if (ctxt->instate == XML_PARSER_EOF) {
8026 xmlBufferFree(buf);
8027 return(-1);
8028 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008029 }
8030 NEXTL(l);
8031 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008032 if (c == 0) {
8033 count = 0;
8034 GROW;
8035 if (ctxt->instate == XML_PARSER_EOF) {
8036 xmlBufferFree(buf);
8037 return(-1);
8038 }
8039 c = CUR_CHAR(l);
8040 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008041 }
8042
8043 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8044 xmlPopInput(ctxt);
8045 } else if (!IS_CHAR(c)) {
8046 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8047 "xmlLoadEntityContent: invalid char value %d\n",
8048 c);
8049 xmlBufferFree(buf);
8050 return(-1);
8051 }
8052 entity->content = buf->content;
8053 buf->content = NULL;
8054 xmlBufferFree(buf);
8055
8056 return(0);
8057}
8058
8059/**
Owen Taylor3473f882001-02-23 17:55:21 +00008060 * xmlParseStringPEReference:
8061 * @ctxt: an XML parser context
8062 * @str: a pointer to an index in the string
8063 *
8064 * parse PEReference declarations
8065 *
8066 * [69] PEReference ::= '%' Name ';'
8067 *
8068 * [ WFC: No Recursion ]
8069 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008070 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008071 *
8072 * [ WFC: Entity Declared ]
8073 * In a document without any DTD, a document with only an internal DTD
8074 * subset which contains no parameter entity references, or a document
8075 * with "standalone='yes'", ... ... The declaration of a parameter
8076 * entity must precede any reference to it...
8077 *
8078 * [ VC: Entity Declared ]
8079 * In a document with an external subset or external parameter entities
8080 * with "standalone='no'", ... ... The declaration of a parameter entity
8081 * must precede any reference to it...
8082 *
8083 * [ WFC: In DTD ]
8084 * Parameter-entity references may only appear in the DTD.
8085 * NOTE: misleading but this is handled.
8086 *
8087 * Returns the string of the entity content.
8088 * str is updated to the current value of the index
8089 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008090static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008091xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8092 const xmlChar *ptr;
8093 xmlChar cur;
8094 xmlChar *name;
8095 xmlEntityPtr entity = NULL;
8096
8097 if ((str == NULL) || (*str == NULL)) return(NULL);
8098 ptr = *str;
8099 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008100 if (cur != '%')
8101 return(NULL);
8102 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008103 name = xmlParseStringName(ctxt, &ptr);
8104 if (name == NULL) {
8105 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8106 "xmlParseStringPEReference: no name\n");
8107 *str = ptr;
8108 return(NULL);
8109 }
8110 cur = *ptr;
8111 if (cur != ';') {
8112 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8113 xmlFree(name);
8114 *str = ptr;
8115 return(NULL);
8116 }
8117 ptr++;
8118
8119 /*
8120 * Increate the number of entity references parsed
8121 */
8122 ctxt->nbentities++;
8123
8124 /*
8125 * Request the entity from SAX
8126 */
8127 if ((ctxt->sax != NULL) &&
8128 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008129 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130 if (ctxt->instate == XML_PARSER_EOF) {
8131 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008132 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008133 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008134 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008135 if (entity == NULL) {
8136 /*
8137 * [ WFC: Entity Declared ]
8138 * In a document without any DTD, a document with only an
8139 * internal DTD subset which contains no parameter entity
8140 * references, or a document with "standalone='yes'", ...
8141 * ... The declaration of a parameter entity must precede
8142 * any reference to it...
8143 */
8144 if ((ctxt->standalone == 1) ||
8145 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8146 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8147 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008148 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008149 /*
8150 * [ VC: Entity Declared ]
8151 * In a document with an external subset or external
8152 * parameter entities with "standalone='no'", ...
8153 * ... The declaration of a parameter entity must
8154 * precede any reference to it...
8155 */
8156 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8157 "PEReference: %%%s; not found\n",
8158 name, NULL);
8159 ctxt->valid = 0;
8160 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008161 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008162 } else {
8163 /*
8164 * Internal checking in case the entity quest barfed
8165 */
8166 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8167 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8168 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8169 "%%%s; is not a parameter entity\n",
8170 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008171 }
8172 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008173 ctxt->hasPErefs = 1;
8174 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008175 *str = ptr;
8176 return(entity);
8177}
8178
8179/**
8180 * xmlParseDocTypeDecl:
8181 * @ctxt: an XML parser context
8182 *
8183 * parse a DOCTYPE declaration
8184 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008185 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008186 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8187 *
8188 * [ VC: Root Element Type ]
8189 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008190 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008191 */
8192
8193void
8194xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008195 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008196 xmlChar *ExternalID = NULL;
8197 xmlChar *URI = NULL;
8198
8199 /*
8200 * We know that '<!DOCTYPE' has been detected.
8201 */
8202 SKIP(9);
8203
8204 SKIP_BLANKS;
8205
8206 /*
8207 * Parse the DOCTYPE name.
8208 */
8209 name = xmlParseName(ctxt);
8210 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008211 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8212 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008213 }
8214 ctxt->intSubName = name;
8215
8216 SKIP_BLANKS;
8217
8218 /*
8219 * Check for SystemID and ExternalID
8220 */
8221 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8222
8223 if ((URI != NULL) || (ExternalID != NULL)) {
8224 ctxt->hasExternalSubset = 1;
8225 }
8226 ctxt->extSubURI = URI;
8227 ctxt->extSubSystem = ExternalID;
8228
8229 SKIP_BLANKS;
8230
8231 /*
8232 * Create and update the internal subset.
8233 */
8234 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8235 (!ctxt->disableSAX))
8236 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008237 if (ctxt->instate == XML_PARSER_EOF)
8238 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008239
8240 /*
8241 * Is there any internal subset declarations ?
8242 * they are handled separately in xmlParseInternalSubset()
8243 */
8244 if (RAW == '[')
8245 return;
8246
8247 /*
8248 * We should be at the end of the DOCTYPE declaration.
8249 */
8250 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008251 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008252 }
8253 NEXT;
8254}
8255
8256/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008257 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008258 * @ctxt: an XML parser context
8259 *
8260 * parse the internal subset declaration
8261 *
8262 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8263 */
8264
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008265static void
Owen Taylor3473f882001-02-23 17:55:21 +00008266xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8267 /*
8268 * Is there any DTD definition ?
8269 */
8270 if (RAW == '[') {
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008271 int baseInputNr = ctxt->inputNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008272 ctxt->instate = XML_PARSER_DTD;
8273 NEXT;
8274 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008275 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008276 * PEReferences.
8277 * Subsequence (markupdecl | PEReference | S)*
8278 */
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008279 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008280 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008281 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008282 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008283
8284 SKIP_BLANKS;
8285 xmlParseMarkupDecl(ctxt);
8286 xmlParsePEReference(ctxt);
8287
Owen Taylor3473f882001-02-23 17:55:21 +00008288 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008289 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008290 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008291 if (ctxt->inputNr > baseInputNr)
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008292 xmlPopInput(ctxt);
8293 else
8294 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008295 }
8296 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008297 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008298 NEXT;
8299 SKIP_BLANKS;
8300 }
8301 }
8302
8303 /*
8304 * We should be at the end of the DOCTYPE declaration.
8305 */
8306 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008307 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008308 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008309 }
8310 NEXT;
8311}
8312
Daniel Veillard81273902003-09-30 00:43:48 +00008313#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008314/**
8315 * xmlParseAttribute:
8316 * @ctxt: an XML parser context
8317 * @value: a xmlChar ** used to store the value of the attribute
8318 *
8319 * parse an attribute
8320 *
8321 * [41] Attribute ::= Name Eq AttValue
8322 *
8323 * [ WFC: No External Entity References ]
8324 * Attribute values cannot contain direct or indirect entity references
8325 * to external entities.
8326 *
8327 * [ WFC: No < in Attribute Values ]
8328 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008329 * an attribute value (other than "&lt;") must not contain a <.
8330 *
Owen Taylor3473f882001-02-23 17:55:21 +00008331 * [ VC: Attribute Value Type ]
8332 * The attribute must have been declared; the value must be of the type
8333 * declared for it.
8334 *
8335 * [25] Eq ::= S? '=' S?
8336 *
8337 * With namespace:
8338 *
8339 * [NS 11] Attribute ::= QName Eq AttValue
8340 *
8341 * Also the case QName == xmlns:??? is handled independently as a namespace
8342 * definition.
8343 *
8344 * Returns the attribute name, and the value in *value.
8345 */
8346
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008347const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008348xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008349 const xmlChar *name;
8350 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008351
8352 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008353 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008354 name = xmlParseName(ctxt);
8355 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008356 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008357 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008358 return(NULL);
8359 }
8360
8361 /*
8362 * read the value
8363 */
8364 SKIP_BLANKS;
8365 if (RAW == '=') {
8366 NEXT;
8367 SKIP_BLANKS;
8368 val = xmlParseAttValue(ctxt);
8369 ctxt->instate = XML_PARSER_CONTENT;
8370 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008371 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008372 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008373 return(NULL);
8374 }
8375
8376 /*
8377 * Check that xml:lang conforms to the specification
8378 * No more registered as an error, just generate a warning now
8379 * since this was deprecated in XML second edition
8380 */
8381 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8382 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008383 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8384 "Malformed value for xml:lang : %s\n",
8385 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008386 }
8387 }
8388
8389 /*
8390 * Check that xml:space conforms to the specification
8391 */
8392 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8393 if (xmlStrEqual(val, BAD_CAST "default"))
8394 *(ctxt->space) = 0;
8395 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8396 *(ctxt->space) = 1;
8397 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008398 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008399"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008400 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008401 }
8402 }
8403
8404 *value = val;
8405 return(name);
8406}
8407
8408/**
8409 * xmlParseStartTag:
8410 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008411 *
Owen Taylor3473f882001-02-23 17:55:21 +00008412 * parse a start of tag either for rule element or
8413 * EmptyElement. In both case we don't parse the tag closing chars.
8414 *
8415 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8416 *
8417 * [ WFC: Unique Att Spec ]
8418 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008419 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008420 *
8421 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8422 *
8423 * [ WFC: Unique Att Spec ]
8424 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008425 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008426 *
8427 * With namespace:
8428 *
8429 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8430 *
8431 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8432 *
8433 * Returns the element name parsed
8434 */
8435
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008436const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008437xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008438 const xmlChar *name;
8439 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008440 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008441 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008442 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008443 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008444 int i;
8445
8446 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008447 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008448
8449 name = xmlParseName(ctxt);
8450 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008452 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008453 return(NULL);
8454 }
8455
8456 /*
8457 * Now parse the attributes, it ends up with the ending
8458 *
8459 * (S Attribute)* S?
8460 */
8461 SKIP_BLANKS;
8462 GROW;
8463
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008464 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008465 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008466 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008467 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008468 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008469
8470 attname = xmlParseAttribute(ctxt, &attvalue);
8471 if ((attname != NULL) && (attvalue != NULL)) {
8472 /*
8473 * [ WFC: Unique Att Spec ]
8474 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008475 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008476 */
8477 for (i = 0; i < nbatts;i += 2) {
8478 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008479 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008480 xmlFree(attvalue);
8481 goto failed;
8482 }
8483 }
Owen Taylor3473f882001-02-23 17:55:21 +00008484 /*
8485 * Add the pair to atts
8486 */
8487 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008488 maxatts = 22; /* allow for 10 attrs by default */
8489 atts = (const xmlChar **)
8490 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008491 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008492 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008493 if (attvalue != NULL)
8494 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008495 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008496 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008497 ctxt->atts = atts;
8498 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008499 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008500 const xmlChar **n;
8501
Owen Taylor3473f882001-02-23 17:55:21 +00008502 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008503 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008504 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008505 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008506 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008507 if (attvalue != NULL)
8508 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008509 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008510 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008511 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008512 ctxt->atts = atts;
8513 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008514 }
8515 atts[nbatts++] = attname;
8516 atts[nbatts++] = attvalue;
8517 atts[nbatts] = NULL;
8518 atts[nbatts + 1] = NULL;
8519 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008520 if (attvalue != NULL)
8521 xmlFree(attvalue);
8522 }
8523
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008524failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008525
Daniel Veillard3772de32002-12-17 10:31:45 +00008526 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008527 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8528 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008529 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8531 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008532 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008533 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8534 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008535 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8536 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008537 break;
8538 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008539 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008540 GROW;
8541 }
8542
8543 /*
8544 * SAX: Start of Element !
8545 */
8546 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008547 (!ctxt->disableSAX)) {
8548 if (nbatts > 0)
8549 ctxt->sax->startElement(ctxt->userData, name, atts);
8550 else
8551 ctxt->sax->startElement(ctxt->userData, name, NULL);
8552 }
Owen Taylor3473f882001-02-23 17:55:21 +00008553
8554 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008555 /* Free only the content strings */
8556 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008557 if (atts[i] != NULL)
8558 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008559 }
8560 return(name);
8561}
8562
8563/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008564 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008565 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 * @line: line of the start tag
8567 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008568 *
8569 * parse an end of tag
8570 *
8571 * [42] ETag ::= '</' Name S? '>'
8572 *
8573 * With namespace
8574 *
8575 * [NS 9] ETag ::= '</' QName S? '>'
8576 */
8577
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008578static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008580 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008581
8582 GROW;
8583 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008584 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008585 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008586 return;
8587 }
8588 SKIP(2);
8589
Daniel Veillard46de64e2002-05-29 08:21:33 +00008590 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008591
8592 /*
8593 * We should definitely be at the ending "S? '>'" part
8594 */
8595 GROW;
8596 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008597 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008598 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008599 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008600 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008601
8602 /*
8603 * [ WFC: Element Type Match ]
8604 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008605 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008606 *
8607 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008608 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008609 if (name == NULL) name = BAD_CAST "unparseable";
8610 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008611 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008612 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008613 }
8614
8615 /*
8616 * SAX: End of Tag
8617 */
8618 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8619 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008620 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008621
Daniel Veillarde57ec792003-09-10 10:50:59 +00008622 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008623 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008624 return;
8625}
8626
8627/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008628 * xmlParseEndTag:
8629 * @ctxt: an XML parser context
8630 *
8631 * parse an end of tag
8632 *
8633 * [42] ETag ::= '</' Name S? '>'
8634 *
8635 * With namespace
8636 *
8637 * [NS 9] ETag ::= '</' QName S? '>'
8638 */
8639
8640void
8641xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008642 xmlParseEndTag1(ctxt, 0);
8643}
Daniel Veillard81273902003-09-30 00:43:48 +00008644#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008645
8646/************************************************************************
8647 * *
8648 * SAX 2 specific operations *
8649 * *
8650 ************************************************************************/
8651
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652/*
8653 * xmlGetNamespace:
8654 * @ctxt: an XML parser context
8655 * @prefix: the prefix to lookup
8656 *
8657 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008658 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008659 *
8660 * Returns the namespace name or NULL if not bound
8661 */
8662static const xmlChar *
8663xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8664 int i;
8665
Daniel Veillarde57ec792003-09-10 10:50:59 +00008666 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008667 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008668 if (ctxt->nsTab[i] == prefix) {
8669 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8670 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008671 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008672 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673 return(NULL);
8674}
8675
8676/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008677 * xmlParseQName:
8678 * @ctxt: an XML parser context
8679 * @prefix: pointer to store the prefix part
8680 *
8681 * parse an XML Namespace QName
8682 *
8683 * [6] QName ::= (Prefix ':')? LocalPart
8684 * [7] Prefix ::= NCName
8685 * [8] LocalPart ::= NCName
8686 *
8687 * Returns the Name parsed or NULL
8688 */
8689
8690static const xmlChar *
8691xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692 const xmlChar *l, *p;
8693
8694 GROW;
8695
8696 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008697 if (l == NULL) {
8698 if (CUR == ':') {
8699 l = xmlParseName(ctxt);
8700 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008701 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008702 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008703 *prefix = NULL;
8704 return(l);
8705 }
8706 }
8707 return(NULL);
8708 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 if (CUR == ':') {
8710 NEXT;
8711 p = l;
8712 l = xmlParseNCName(ctxt);
8713 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008714 xmlChar *tmp;
8715
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008716 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8717 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008718 l = xmlParseNmtoken(ctxt);
8719 if (l == NULL)
8720 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8721 else {
8722 tmp = xmlBuildQName(l, p, NULL, 0);
8723 xmlFree((char *)l);
8724 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008725 p = xmlDictLookup(ctxt->dict, tmp, -1);
8726 if (tmp != NULL) xmlFree(tmp);
8727 *prefix = NULL;
8728 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008729 }
8730 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008731 xmlChar *tmp;
8732
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008733 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8734 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008735 NEXT;
8736 tmp = (xmlChar *) xmlParseName(ctxt);
8737 if (tmp != NULL) {
8738 tmp = xmlBuildQName(tmp, l, NULL, 0);
8739 l = xmlDictLookup(ctxt->dict, tmp, -1);
8740 if (tmp != NULL) xmlFree(tmp);
8741 *prefix = p;
8742 return(l);
8743 }
8744 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8745 l = xmlDictLookup(ctxt->dict, tmp, -1);
8746 if (tmp != NULL) xmlFree(tmp);
8747 *prefix = p;
8748 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008749 }
8750 *prefix = p;
8751 } else
8752 *prefix = NULL;
8753 return(l);
8754}
8755
8756/**
8757 * xmlParseQNameAndCompare:
8758 * @ctxt: an XML parser context
8759 * @name: the localname
8760 * @prefix: the prefix, if any.
8761 *
8762 * parse an XML name and compares for match
8763 * (specialized for endtag parsing)
8764 *
8765 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8766 * and the name for mismatch
8767 */
8768
8769static const xmlChar *
8770xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8771 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008772 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008773 const xmlChar *in;
8774 const xmlChar *ret;
8775 const xmlChar *prefix2;
8776
8777 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8778
8779 GROW;
8780 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008781
Daniel Veillard0fb18932003-09-07 09:14:37 +00008782 cmp = prefix;
8783 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008784 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008785 ++cmp;
8786 }
8787 if ((*cmp == 0) && (*in == ':')) {
8788 in++;
8789 cmp = name;
8790 while (*in != 0 && *in == *cmp) {
8791 ++in;
8792 ++cmp;
8793 }
William M. Brack76e95df2003-10-18 16:20:14 +00008794 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008795 /* success */
8796 ctxt->input->cur = in;
8797 return((const xmlChar*) 1);
8798 }
8799 }
8800 /*
8801 * all strings coms from the dictionary, equality can be done directly
8802 */
8803 ret = xmlParseQName (ctxt, &prefix2);
8804 if ((ret == name) && (prefix == prefix2))
8805 return((const xmlChar*) 1);
8806 return ret;
8807}
8808
8809/**
8810 * xmlParseAttValueInternal:
8811 * @ctxt: an XML parser context
8812 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008813 * @alloc: whether the attribute was reallocated as a new string
8814 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 *
8816 * parse a value for an attribute.
8817 * NOTE: if no normalization is needed, the routine will return pointers
8818 * directly from the data buffer.
8819 *
8820 * 3.3.3 Attribute-Value Normalization:
8821 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008822 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008823 * - a character reference is processed by appending the referenced
8824 * character to the attribute value
8825 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008826 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8828 * appending #x20 to the normalized value, except that only a single
8829 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008830 * parsed entity or the literal entity value of an internal parsed entity
8831 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832 * If the declared value is not CDATA, then the XML processor must further
8833 * process the normalized attribute value by discarding any leading and
8834 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008835 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008836 * All attributes for which no declaration has been read should be treated
8837 * by a non-validating parser as if declared CDATA.
8838 *
8839 * Returns the AttValue parsed or NULL. The value has to be freed by the
8840 * caller if it was copied, this can be detected by val[*len] == 0.
8841 */
8842
8843static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008844xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8845 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008846{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008847 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008848 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008850 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851
8852 GROW;
8853 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008854 line = ctxt->input->line;
8855 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008856 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008857 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008858 return (NULL);
8859 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008860 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008861
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008862 /*
8863 * try to handle in this routine the most common case where no
8864 * allocation of a new string is required and where content is
8865 * pure ASCII.
8866 */
8867 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008868 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008869 end = ctxt->input->end;
8870 start = in;
8871 if (in >= end) {
8872 const xmlChar *oldbase = ctxt->input->base;
8873 GROW;
8874 if (oldbase != ctxt->input->base) {
8875 long delta = ctxt->input->base - oldbase;
8876 start = start + delta;
8877 in = in + delta;
8878 }
8879 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008880 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008881 if (normalize) {
8882 /*
8883 * Skip any leading spaces
8884 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008885 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008886 ((*in == 0x20) || (*in == 0x9) ||
8887 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008888 if (*in == 0xA) {
8889 line++; col = 1;
8890 } else {
8891 col++;
8892 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008893 in++;
8894 start = in;
8895 if (in >= end) {
8896 const xmlChar *oldbase = ctxt->input->base;
8897 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008898 if (ctxt->instate == XML_PARSER_EOF)
8899 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008900 if (oldbase != ctxt->input->base) {
8901 long delta = ctxt->input->base - oldbase;
8902 start = start + delta;
8903 in = in + delta;
8904 }
8905 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008906 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8907 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8908 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008909 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008910 return(NULL);
8911 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008912 }
8913 }
8914 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8915 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008916 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008917 if ((*in++ == 0x20) && (*in == 0x20)) break;
8918 if (in >= end) {
8919 const xmlChar *oldbase = ctxt->input->base;
8920 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008921 if (ctxt->instate == XML_PARSER_EOF)
8922 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008923 if (oldbase != ctxt->input->base) {
8924 long delta = ctxt->input->base - oldbase;
8925 start = start + delta;
8926 in = in + delta;
8927 }
8928 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008929 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8930 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8931 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008932 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008933 return(NULL);
8934 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008935 }
8936 }
8937 last = in;
8938 /*
8939 * skip the trailing blanks
8940 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008941 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008942 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008943 ((*in == 0x20) || (*in == 0x9) ||
8944 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008945 if (*in == 0xA) {
8946 line++, col = 1;
8947 } else {
8948 col++;
8949 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008950 in++;
8951 if (in >= end) {
8952 const xmlChar *oldbase = ctxt->input->base;
8953 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008954 if (ctxt->instate == XML_PARSER_EOF)
8955 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008956 if (oldbase != ctxt->input->base) {
8957 long delta = ctxt->input->base - oldbase;
8958 start = start + delta;
8959 in = in + delta;
8960 last = last + delta;
8961 }
8962 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008963 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8964 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8965 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008966 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008967 return(NULL);
8968 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008969 }
8970 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008971 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8972 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8973 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008974 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008975 return(NULL);
8976 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008977 if (*in != limit) goto need_complex;
8978 } else {
8979 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8980 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8981 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008982 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008983 if (in >= end) {
8984 const xmlChar *oldbase = ctxt->input->base;
8985 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008986 if (ctxt->instate == XML_PARSER_EOF)
8987 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008988 if (oldbase != ctxt->input->base) {
8989 long delta = ctxt->input->base - oldbase;
8990 start = start + delta;
8991 in = in + delta;
8992 }
8993 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008994 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8995 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8996 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008997 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008998 return(NULL);
8999 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009000 }
9001 }
9002 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009003 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9004 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9005 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009006 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009007 return(NULL);
9008 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009009 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009010 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009011 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009012 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009013 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009014 *len = last - start;
9015 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009016 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009017 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009018 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009019 }
9020 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009021 ctxt->input->line = line;
9022 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009023 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009024 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009025need_complex:
9026 if (alloc) *alloc = 1;
9027 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009028}
9029
9030/**
9031 * xmlParseAttribute2:
9032 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009033 * @pref: the element prefix
9034 * @elem: the element name
9035 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009036 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009037 * @len: an int * to save the length of the attribute
9038 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009039 *
9040 * parse an attribute in the new SAX2 framework.
9041 *
9042 * Returns the attribute name, and the value in *value, .
9043 */
9044
9045static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009046xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009047 const xmlChar * pref, const xmlChar * elem,
9048 const xmlChar ** prefix, xmlChar ** value,
9049 int *len, int *alloc)
9050{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009051 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009052 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009053 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009054
9055 *value = NULL;
9056 GROW;
9057 name = xmlParseQName(ctxt, prefix);
9058 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9060 "error parsing attribute name\n");
9061 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009062 }
9063
9064 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009065 * get the type if needed
9066 */
9067 if (ctxt->attsSpecial != NULL) {
9068 int type;
9069
Nick Wellnhoferd422b952017-10-09 13:37:42 +02009070 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9071 pref, elem, *prefix, name);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009072 if (type != 0)
9073 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009074 }
9075
9076 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009077 * read the value
9078 */
9079 SKIP_BLANKS;
9080 if (RAW == '=') {
9081 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009082 SKIP_BLANKS;
9083 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9084 if (normalize) {
9085 /*
9086 * Sometimes a second normalisation pass for spaces is needed
9087 * but that only happens if charrefs or entities refernces
9088 * have been used in the attribute value, i.e. the attribute
9089 * value have been extracted in an allocated string already.
9090 */
9091 if (*alloc) {
9092 const xmlChar *val2;
9093
9094 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009095 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009096 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009097 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009098 }
9099 }
9100 }
9101 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009102 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009103 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009104 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009105 name);
9106 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009107 }
9108
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009109 if (*prefix == ctxt->str_xml) {
9110 /*
9111 * Check that xml:lang conforms to the specification
9112 * No more registered as an error, just generate a warning now
9113 * since this was deprecated in XML second edition
9114 */
9115 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9116 internal_val = xmlStrndup(val, *len);
9117 if (!xmlCheckLanguageID(internal_val)) {
9118 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9119 "Malformed value for xml:lang : %s\n",
9120 internal_val, NULL);
9121 }
9122 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009123
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009124 /*
9125 * Check that xml:space conforms to the specification
9126 */
9127 if (xmlStrEqual(name, BAD_CAST "space")) {
9128 internal_val = xmlStrndup(val, *len);
9129 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9130 *(ctxt->space) = 0;
9131 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9132 *(ctxt->space) = 1;
9133 else {
9134 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9135 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9136 internal_val, NULL);
9137 }
9138 }
9139 if (internal_val) {
9140 xmlFree(internal_val);
9141 }
9142 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009143
9144 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009145 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009146}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009147/**
9148 * xmlParseStartTag2:
9149 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009150 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009151 * parse a start of tag either for rule element or
9152 * EmptyElement. In both case we don't parse the tag closing chars.
9153 * This routine is called when running SAX2 parsing
9154 *
9155 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9156 *
9157 * [ WFC: Unique Att Spec ]
9158 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009159 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009160 *
9161 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9162 *
9163 * [ WFC: Unique Att Spec ]
9164 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009165 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009166 *
9167 * With namespace:
9168 *
9169 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9170 *
9171 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9172 *
9173 * Returns the element name parsed
9174 */
9175
9176static const xmlChar *
9177xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009178 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009179 const xmlChar *localname;
9180 const xmlChar *prefix;
9181 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009182 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009183 const xmlChar *nsname;
9184 xmlChar *attvalue;
9185 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009186 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009187 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009188 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009189 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009190 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009191
9192 if (RAW != '<') return(NULL);
9193 NEXT1;
9194
9195 /*
9196 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9197 * point since the attribute values may be stored as pointers to
9198 * the buffer and calling SHRINK would destroy them !
9199 * The Shrinking is only possible once the full set of attribute
9200 * callbacks have been done.
9201 */
9202 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009203 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009204 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009205 nbatts = 0;
9206 nratts = 0;
9207 nbdef = 0;
9208 nbNs = 0;
9209 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009210 /* Forget any namespaces added during an earlier parse of this element. */
9211 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009212
9213 localname = xmlParseQName(ctxt, &prefix);
9214 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009215 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9216 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009217 return(NULL);
9218 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009219 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220
9221 /*
9222 * Now parse the attributes, it ends up with the ending
9223 *
9224 * (S Attribute)* S?
9225 */
9226 SKIP_BLANKS;
9227 GROW;
9228
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009229 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009230 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009231 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009232 const xmlChar *q = CUR_PTR;
9233 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009234 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009235
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009236 attname = xmlParseAttribute2(ctxt, prefix, localname,
9237 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009238 if ((attname == NULL) || (attvalue == NULL))
9239 goto next_attr;
9240 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009241
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009242 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9243 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9244 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009245
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009246 if (URL == NULL) {
9247 xmlErrMemory(ctxt, "dictionary allocation failure");
9248 if ((attvalue != NULL) && (alloc != 0))
9249 xmlFree(attvalue);
9250 return(NULL);
9251 }
9252 if (*URL != 0) {
9253 uri = xmlParseURI((const char *) URL);
9254 if (uri == NULL) {
9255 xmlNsErr(ctxt, XML_WAR_NS_URI,
9256 "xmlns: '%s' is not a valid URI\n",
9257 URL, NULL, NULL);
9258 } else {
9259 if (uri->scheme == NULL) {
9260 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9261 "xmlns: URI %s is not absolute\n",
9262 URL, NULL, NULL);
9263 }
9264 xmlFreeURI(uri);
9265 }
Daniel Veillard37334572008-07-31 08:20:02 +00009266 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009267 if (attname != ctxt->str_xml) {
9268 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9269 "xml namespace URI cannot be the default namespace\n",
9270 NULL, NULL, NULL);
9271 }
9272 goto next_attr;
9273 }
9274 if ((len == 29) &&
9275 (xmlStrEqual(URL,
9276 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9277 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9278 "reuse of the xmlns namespace name is forbidden\n",
9279 NULL, NULL, NULL);
9280 goto next_attr;
9281 }
9282 }
9283 /*
9284 * check that it's not a defined namespace
9285 */
9286 for (j = 1;j <= nbNs;j++)
9287 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9288 break;
9289 if (j <= nbNs)
9290 xmlErrAttributeDup(ctxt, NULL, attname);
9291 else
9292 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009293
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009294 } else if (aprefix == ctxt->str_xmlns) {
9295 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9296 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009297
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009298 if (attname == ctxt->str_xml) {
9299 if (URL != ctxt->str_xml_ns) {
9300 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9301 "xml namespace prefix mapped to wrong URI\n",
9302 NULL, NULL, NULL);
9303 }
9304 /*
9305 * Do not keep a namespace definition node
9306 */
9307 goto next_attr;
9308 }
9309 if (URL == ctxt->str_xml_ns) {
9310 if (attname != ctxt->str_xml) {
9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312 "xml namespace URI mapped to wrong prefix\n",
9313 NULL, NULL, NULL);
9314 }
9315 goto next_attr;
9316 }
9317 if (attname == ctxt->str_xmlns) {
9318 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319 "redefinition of the xmlns prefix is forbidden\n",
9320 NULL, NULL, NULL);
9321 goto next_attr;
9322 }
9323 if ((len == 29) &&
9324 (xmlStrEqual(URL,
9325 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9326 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9327 "reuse of the xmlns namespace name is forbidden\n",
9328 NULL, NULL, NULL);
9329 goto next_attr;
9330 }
9331 if ((URL == NULL) || (URL[0] == 0)) {
9332 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9333 "xmlns:%s: Empty XML namespace is not allowed\n",
9334 attname, NULL, NULL);
9335 goto next_attr;
9336 } else {
9337 uri = xmlParseURI((const char *) URL);
9338 if (uri == NULL) {
9339 xmlNsErr(ctxt, XML_WAR_NS_URI,
9340 "xmlns:%s: '%s' is not a valid URI\n",
9341 attname, URL, NULL);
9342 } else {
9343 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9344 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9345 "xmlns:%s: URI %s is not absolute\n",
9346 attname, URL, NULL);
9347 }
9348 xmlFreeURI(uri);
9349 }
9350 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009351
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009352 /*
9353 * check that it's not a defined namespace
9354 */
9355 for (j = 1;j <= nbNs;j++)
9356 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9357 break;
9358 if (j <= nbNs)
9359 xmlErrAttributeDup(ctxt, aprefix, attname);
9360 else
9361 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9362
9363 } else {
9364 /*
9365 * Add the pair to atts
9366 */
9367 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9368 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9369 goto next_attr;
9370 }
9371 maxatts = ctxt->maxatts;
9372 atts = ctxt->atts;
9373 }
9374 ctxt->attallocs[nratts++] = alloc;
9375 atts[nbatts++] = attname;
9376 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009377 /*
9378 * The namespace URI field is used temporarily to point at the
9379 * base of the current input buffer for non-alloced attributes.
9380 * When the input buffer is reallocated, all the pointers become
9381 * invalid, but they can be reconstructed later.
9382 */
9383 if (alloc)
9384 atts[nbatts++] = NULL;
9385 else
9386 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009387 atts[nbatts++] = attvalue;
9388 attvalue += len;
9389 atts[nbatts++] = attvalue;
9390 /*
9391 * tag if some deallocation is needed
9392 */
9393 if (alloc != 0) attval = 1;
9394 attvalue = NULL; /* moved into atts */
9395 }
9396
9397next_attr:
9398 if ((attvalue != NULL) && (alloc != 0)) {
9399 xmlFree(attvalue);
9400 attvalue = NULL;
9401 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009402
9403 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009404 if (ctxt->instate == XML_PARSER_EOF)
9405 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009406 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9407 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009408 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009409 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9410 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009411 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009412 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009413 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9414 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009416 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009417 break;
9418 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009419 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009420 }
9421
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009422 if (ctxt->input->id != inputid) {
9423 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9424 "Unexpected change of input\n");
9425 localname = NULL;
9426 goto done;
9427 }
9428
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009429 /* Reconstruct attribute value pointers. */
9430 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9431 if (atts[i+2] != NULL) {
9432 /*
9433 * Arithmetic on dangling pointers is technically undefined
9434 * behavior, but well...
9435 */
9436 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9437 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9438 atts[i+3] += offset; /* value */
9439 atts[i+4] += offset; /* valuend */
9440 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009441 }
9442
Daniel Veillard0fb18932003-09-07 09:14:37 +00009443 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009444 * The attributes defaulting
9445 */
9446 if (ctxt->attsDefault != NULL) {
9447 xmlDefAttrsPtr defaults;
9448
9449 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9450 if (defaults != NULL) {
9451 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009452 attname = defaults->values[5 * i];
9453 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009454
9455 /*
9456 * special work for namespaces defaulted defs
9457 */
9458 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9459 /*
9460 * check that it's not a defined namespace
9461 */
9462 for (j = 1;j <= nbNs;j++)
9463 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9464 break;
9465 if (j <= nbNs) continue;
9466
9467 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009468 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009469 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009470 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009471 nbNs++;
9472 }
9473 } else if (aprefix == ctxt->str_xmlns) {
9474 /*
9475 * check that it's not a defined namespace
9476 */
9477 for (j = 1;j <= nbNs;j++)
9478 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9479 break;
9480 if (j <= nbNs) continue;
9481
9482 nsname = xmlGetNamespace(ctxt, attname);
9483 if (nsname != defaults->values[2]) {
9484 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009485 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009486 nbNs++;
9487 }
9488 } else {
9489 /*
9490 * check that it's not a defined attribute
9491 */
9492 for (j = 0;j < nbatts;j+=5) {
9493 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9494 break;
9495 }
9496 if (j < nbatts) continue;
9497
9498 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9499 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009500 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009501 }
9502 maxatts = ctxt->maxatts;
9503 atts = ctxt->atts;
9504 }
9505 atts[nbatts++] = attname;
9506 atts[nbatts++] = aprefix;
9507 if (aprefix == NULL)
9508 atts[nbatts++] = NULL;
9509 else
9510 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009511 atts[nbatts++] = defaults->values[5 * i + 2];
9512 atts[nbatts++] = defaults->values[5 * i + 3];
9513 if ((ctxt->standalone == 1) &&
9514 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009515 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009516 "standalone: attribute %s on %s defaulted from external subset\n",
9517 attname, localname);
9518 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009519 nbdef++;
9520 }
9521 }
9522 }
9523 }
9524
Daniel Veillarde70c8772003-11-25 07:21:18 +00009525 /*
9526 * The attributes checkings
9527 */
9528 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009529 /*
9530 * The default namespace does not apply to attribute names.
9531 */
9532 if (atts[i + 1] != NULL) {
9533 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9534 if (nsname == NULL) {
9535 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9536 "Namespace prefix %s for %s on %s is not defined\n",
9537 atts[i + 1], atts[i], localname);
9538 }
9539 atts[i + 2] = nsname;
9540 } else
9541 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009542 /*
9543 * [ WFC: Unique Att Spec ]
9544 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009545 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009546 * As extended by the Namespace in XML REC.
9547 */
9548 for (j = 0; j < i;j += 5) {
9549 if (atts[i] == atts[j]) {
9550 if (atts[i+1] == atts[j+1]) {
9551 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9552 break;
9553 }
9554 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9555 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9556 "Namespaced Attribute %s in '%s' redefined\n",
9557 atts[i], nsname, NULL);
9558 break;
9559 }
9560 }
9561 }
9562 }
9563
Daniel Veillarde57ec792003-09-10 10:50:59 +00009564 nsname = xmlGetNamespace(ctxt, prefix);
9565 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009566 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9567 "Namespace prefix %s on %s is not defined\n",
9568 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009569 }
9570 *pref = prefix;
9571 *URI = nsname;
9572
9573 /*
9574 * SAX: Start of Element !
9575 */
9576 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9577 (!ctxt->disableSAX)) {
9578 if (nbNs > 0)
9579 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9580 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9581 nbatts / 5, nbdef, atts);
9582 else
9583 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9584 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9585 }
9586
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009587done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009588 /*
9589 * Free up attribute allocated strings if needed
9590 */
9591 if (attval != 0) {
9592 for (i = 3,j = 0; j < nratts;i += 5,j++)
9593 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9594 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009595 }
9596
9597 return(localname);
9598}
9599
9600/**
9601 * xmlParseEndTag2:
9602 * @ctxt: an XML parser context
9603 * @line: line of the start tag
9604 * @nsNr: number of namespaces on the start tag
9605 *
9606 * parse an end of tag
9607 *
9608 * [42] ETag ::= '</' Name S? '>'
9609 *
9610 * With namespace
9611 *
9612 * [NS 9] ETag ::= '</' QName S? '>'
9613 */
9614
9615static void
9616xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009617 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009618 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009619 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009620
9621 GROW;
9622 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009623 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009624 return;
9625 }
9626 SKIP(2);
9627
David Kilzerdb07dd62016-02-12 09:58:29 -08009628 curLength = ctxt->input->end - ctxt->input->cur;
9629 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9630 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9631 if ((curLength >= (size_t)(tlen + 1)) &&
9632 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009633 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009634 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009635 goto done;
9636 }
9637 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009638 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009639 name = (xmlChar*)1;
9640 } else {
9641 if (prefix == NULL)
9642 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9643 else
9644 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9645 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009646
9647 /*
9648 * We should definitely be at the ending "S? '>'" part
9649 */
9650 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009651 if (ctxt->instate == XML_PARSER_EOF)
9652 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009653 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009654 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009655 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009656 } else
9657 NEXT1;
9658
9659 /*
9660 * [ WFC: Element Type Match ]
9661 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009662 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009663 *
9664 */
9665 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009666 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009667 if ((line == 0) && (ctxt->node != NULL))
9668 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009669 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009670 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009671 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009672 }
9673
9674 /*
9675 * SAX: End of Tag
9676 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009677done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009678 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9679 (!ctxt->disableSAX))
9680 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9681
Daniel Veillard0fb18932003-09-07 09:14:37 +00009682 spacePop(ctxt);
9683 if (nsNr != 0)
9684 nsPop(ctxt, nsNr);
9685 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009686}
9687
9688/**
Owen Taylor3473f882001-02-23 17:55:21 +00009689 * xmlParseCDSect:
9690 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009691 *
Owen Taylor3473f882001-02-23 17:55:21 +00009692 * Parse escaped pure raw content.
9693 *
9694 * [18] CDSect ::= CDStart CData CDEnd
9695 *
9696 * [19] CDStart ::= '<![CDATA['
9697 *
9698 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9699 *
9700 * [21] CDEnd ::= ']]>'
9701 */
9702void
9703xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9704 xmlChar *buf = NULL;
9705 int len = 0;
9706 int size = XML_PARSER_BUFFER_SIZE;
9707 int r, rl;
9708 int s, sl;
9709 int cur, l;
9710 int count = 0;
9711
Daniel Veillard8f597c32003-10-06 08:19:27 +00009712 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009713 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009714 SKIP(9);
9715 } else
9716 return;
9717
9718 ctxt->instate = XML_PARSER_CDATA_SECTION;
9719 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009720 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009721 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009722 ctxt->instate = XML_PARSER_CONTENT;
9723 return;
9724 }
9725 NEXTL(rl);
9726 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009727 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009728 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009729 ctxt->instate = XML_PARSER_CONTENT;
9730 return;
9731 }
9732 NEXTL(sl);
9733 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009734 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009735 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009737 return;
9738 }
William M. Brack871611b2003-10-18 04:53:14 +00009739 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009740 ((r != ']') || (s != ']') || (cur != '>'))) {
9741 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009742 xmlChar *tmp;
9743
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009744 if ((size > XML_MAX_TEXT_LENGTH) &&
9745 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9746 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9747 "CData section too big found", NULL);
9748 xmlFree (buf);
9749 return;
9750 }
9751 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009752 if (tmp == NULL) {
9753 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009754 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009755 return;
9756 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009757 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009758 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009759 }
9760 COPY_BUF(rl,buf,len,r);
9761 r = s;
9762 rl = sl;
9763 s = cur;
9764 sl = l;
9765 count++;
9766 if (count > 50) {
9767 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009768 if (ctxt->instate == XML_PARSER_EOF) {
9769 xmlFree(buf);
9770 return;
9771 }
Owen Taylor3473f882001-02-23 17:55:21 +00009772 count = 0;
9773 }
9774 NEXTL(l);
9775 cur = CUR_CHAR(l);
9776 }
9777 buf[len] = 0;
9778 ctxt->instate = XML_PARSER_CONTENT;
9779 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009780 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009781 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009782 xmlFree(buf);
9783 return;
9784 }
9785 NEXTL(l);
9786
9787 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009788 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009789 */
9790 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9791 if (ctxt->sax->cdataBlock != NULL)
9792 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009793 else if (ctxt->sax->characters != NULL)
9794 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009795 }
9796 xmlFree(buf);
9797}
9798
9799/**
9800 * xmlParseContent:
9801 * @ctxt: an XML parser context
9802 *
9803 * Parse a content:
9804 *
9805 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9806 */
9807
9808void
9809xmlParseContent(xmlParserCtxtPtr ctxt) {
9810 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009811 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009812 ((RAW != '<') || (NXT(1) != '/')) &&
9813 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009814 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009815 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009816 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009817
9818 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009819 * First case : a Processing Instruction.
9820 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009821 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009822 xmlParsePI(ctxt);
9823 }
9824
9825 /*
9826 * Second case : a CDSection
9827 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009828 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009829 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009830 xmlParseCDSect(ctxt);
9831 }
9832
9833 /*
9834 * Third case : a comment
9835 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009836 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009837 (NXT(2) == '-') && (NXT(3) == '-')) {
9838 xmlParseComment(ctxt);
9839 ctxt->instate = XML_PARSER_CONTENT;
9840 }
9841
9842 /*
9843 * Fourth case : a sub-element.
9844 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009845 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009846 xmlParseElement(ctxt);
9847 }
9848
9849 /*
9850 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009851 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009852 */
9853
Daniel Veillard21a0f912001-02-25 19:54:14 +00009854 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009855 xmlParseReference(ctxt);
9856 }
9857
9858 /*
9859 * Last case, text. Note that References are handled directly.
9860 */
9861 else {
9862 xmlParseCharData(ctxt, 0);
9863 }
9864
9865 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009866 SHRINK;
9867
Daniel Veillardfdc91562002-07-01 21:52:03 +00009868 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009869 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9870 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009871 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009872 break;
9873 }
9874 }
9875}
9876
9877/**
9878 * xmlParseElement:
9879 * @ctxt: an XML parser context
9880 *
9881 * parse an XML element, this is highly recursive
9882 *
9883 * [39] element ::= EmptyElemTag | STag content ETag
9884 *
9885 * [ WFC: Element Type Match ]
9886 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009887 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009888 *
Owen Taylor3473f882001-02-23 17:55:21 +00009889 */
9890
9891void
9892xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009893 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009894 const xmlChar *prefix = NULL;
9895 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009896 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009897 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009898 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009899 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009900
Daniel Veillard8915c152008-08-26 13:05:34 +00009901 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9902 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9903 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9904 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9905 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08009906 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009907 return;
9908 }
9909
Owen Taylor3473f882001-02-23 17:55:21 +00009910 /* Capture start position */
9911 if (ctxt->record_info) {
9912 node_info.begin_pos = ctxt->input->consumed +
9913 (CUR_PTR - ctxt->input->base);
9914 node_info.begin_line = ctxt->input->line;
9915 }
9916
9917 if (ctxt->spaceNr == 0)
9918 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009919 else if (*ctxt->space == -2)
9920 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009921 else
9922 spacePush(ctxt, *ctxt->space);
9923
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009924 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009925#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009926 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009927#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009928 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009929#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009930 else
9931 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009932#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009933 if (ctxt->instate == XML_PARSER_EOF)
9934 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009935 if (name == NULL) {
9936 spacePop(ctxt);
9937 return;
9938 }
9939 namePush(ctxt, name);
9940 ret = ctxt->node;
9941
Daniel Veillard4432df22003-09-28 18:58:27 +00009942#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009943 /*
9944 * [ VC: Root Element Type ]
9945 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009946 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009947 */
9948 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9949 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9950 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009951#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009952
9953 /*
9954 * Check for an Empty Element.
9955 */
9956 if ((RAW == '/') && (NXT(1) == '>')) {
9957 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009958 if (ctxt->sax2) {
9959 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9960 (!ctxt->disableSAX))
9961 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009962#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009963 } else {
9964 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9965 (!ctxt->disableSAX))
9966 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009967#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009968 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009969 namePop(ctxt);
9970 spacePop(ctxt);
9971 if (nsNr != ctxt->nsNr)
9972 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009973 if ( ret != NULL && ctxt->record_info ) {
9974 node_info.end_pos = ctxt->input->consumed +
9975 (CUR_PTR - ctxt->input->base);
9976 node_info.end_line = ctxt->input->line;
9977 node_info.node = ret;
9978 xmlParserAddNodeInfo(ctxt, &node_info);
9979 }
9980 return;
9981 }
9982 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009983 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009984 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009985 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9986 "Couldn't find end of Start Tag %s line %d\n",
9987 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009988
9989 /*
9990 * end of parsing of this node.
9991 */
9992 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009993 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009994 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009995 if (nsNr != ctxt->nsNr)
9996 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009997
9998 /*
9999 * Capture end position and add node
10000 */
10001 if ( ret != NULL && ctxt->record_info ) {
10002 node_info.end_pos = ctxt->input->consumed +
10003 (CUR_PTR - ctxt->input->base);
10004 node_info.end_line = ctxt->input->line;
10005 node_info.node = ret;
10006 xmlParserAddNodeInfo(ctxt, &node_info);
10007 }
10008 return;
10009 }
10010
10011 /*
10012 * Parse the content of the element:
10013 */
10014 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010015 if (ctxt->instate == XML_PARSER_EOF)
10016 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010017 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010018 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010019 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010020 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010021
10022 /*
10023 * end of parsing of this node.
10024 */
10025 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010026 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010027 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010028 if (nsNr != ctxt->nsNr)
10029 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010030 return;
10031 }
10032
10033 /*
10034 * parse the end of tag: '</' should be here.
10035 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010036 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010037 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010038 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010039 }
10040#ifdef LIBXML_SAX1_ENABLED
10041 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010042 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010043#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010044
10045 /*
10046 * Capture end position and add node
10047 */
10048 if ( ret != NULL && ctxt->record_info ) {
10049 node_info.end_pos = ctxt->input->consumed +
10050 (CUR_PTR - ctxt->input->base);
10051 node_info.end_line = ctxt->input->line;
10052 node_info.node = ret;
10053 xmlParserAddNodeInfo(ctxt, &node_info);
10054 }
10055}
10056
10057/**
10058 * xmlParseVersionNum:
10059 * @ctxt: an XML parser context
10060 *
10061 * parse the XML version value.
10062 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010063 * [26] VersionNum ::= '1.' [0-9]+
10064 *
10065 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010066 *
10067 * Returns the string giving the XML version number, or NULL
10068 */
10069xmlChar *
10070xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10071 xmlChar *buf = NULL;
10072 int len = 0;
10073 int size = 10;
10074 xmlChar cur;
10075
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010076 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010077 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010078 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010079 return(NULL);
10080 }
10081 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010082 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010083 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010084 return(NULL);
10085 }
10086 buf[len++] = cur;
10087 NEXT;
10088 cur=CUR;
10089 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010090 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010091 return(NULL);
10092 }
10093 buf[len++] = cur;
10094 NEXT;
10095 cur=CUR;
10096 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010097 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010098 xmlChar *tmp;
10099
Owen Taylor3473f882001-02-23 17:55:21 +000010100 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010101 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10102 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010103 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010104 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010105 return(NULL);
10106 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010107 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010108 }
10109 buf[len++] = cur;
10110 NEXT;
10111 cur=CUR;
10112 }
10113 buf[len] = 0;
10114 return(buf);
10115}
10116
10117/**
10118 * xmlParseVersionInfo:
10119 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010120 *
Owen Taylor3473f882001-02-23 17:55:21 +000010121 * parse the XML version.
10122 *
10123 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010124 *
Owen Taylor3473f882001-02-23 17:55:21 +000010125 * [25] Eq ::= S? '=' S?
10126 *
10127 * Returns the version string, e.g. "1.0"
10128 */
10129
10130xmlChar *
10131xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10132 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010133
Daniel Veillarda07050d2003-10-19 14:46:32 +000010134 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010135 SKIP(7);
10136 SKIP_BLANKS;
10137 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010138 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010139 return(NULL);
10140 }
10141 NEXT;
10142 SKIP_BLANKS;
10143 if (RAW == '"') {
10144 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010145 version = xmlParseVersionNum(ctxt);
10146 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010147 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010148 } else
10149 NEXT;
10150 } else if (RAW == '\''){
10151 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010152 version = xmlParseVersionNum(ctxt);
10153 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010154 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010155 } else
10156 NEXT;
10157 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010158 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010159 }
10160 }
10161 return(version);
10162}
10163
10164/**
10165 * xmlParseEncName:
10166 * @ctxt: an XML parser context
10167 *
10168 * parse the XML encoding name
10169 *
10170 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10171 *
10172 * Returns the encoding name value or NULL
10173 */
10174xmlChar *
10175xmlParseEncName(xmlParserCtxtPtr ctxt) {
10176 xmlChar *buf = NULL;
10177 int len = 0;
10178 int size = 10;
10179 xmlChar cur;
10180
10181 cur = CUR;
10182 if (((cur >= 'a') && (cur <= 'z')) ||
10183 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010184 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010185 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010186 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010187 return(NULL);
10188 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010189
Owen Taylor3473f882001-02-23 17:55:21 +000010190 buf[len++] = cur;
10191 NEXT;
10192 cur = CUR;
10193 while (((cur >= 'a') && (cur <= 'z')) ||
10194 ((cur >= 'A') && (cur <= 'Z')) ||
10195 ((cur >= '0') && (cur <= '9')) ||
10196 (cur == '.') || (cur == '_') ||
10197 (cur == '-')) {
10198 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010199 xmlChar *tmp;
10200
Owen Taylor3473f882001-02-23 17:55:21 +000010201 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010202 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10203 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010204 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010205 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010206 return(NULL);
10207 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010208 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010209 }
10210 buf[len++] = cur;
10211 NEXT;
10212 cur = CUR;
10213 if (cur == 0) {
10214 SHRINK;
10215 GROW;
10216 cur = CUR;
10217 }
10218 }
10219 buf[len] = 0;
10220 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010221 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010222 }
10223 return(buf);
10224}
10225
10226/**
10227 * xmlParseEncodingDecl:
10228 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010229 *
Owen Taylor3473f882001-02-23 17:55:21 +000010230 * parse the XML encoding declaration
10231 *
10232 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10233 *
10234 * this setups the conversion filters.
10235 *
10236 * Returns the encoding value or NULL
10237 */
10238
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010239const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010240xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10241 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010242
10243 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010244 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010245 SKIP(8);
10246 SKIP_BLANKS;
10247 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010248 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010249 return(NULL);
10250 }
10251 NEXT;
10252 SKIP_BLANKS;
10253 if (RAW == '"') {
10254 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010255 encoding = xmlParseEncName(ctxt);
10256 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010257 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010258 xmlFree((xmlChar *) encoding);
10259 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010260 } else
10261 NEXT;
10262 } else if (RAW == '\''){
10263 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010264 encoding = xmlParseEncName(ctxt);
10265 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010266 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010267 xmlFree((xmlChar *) encoding);
10268 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010269 } else
10270 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010271 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010273 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010274
10275 /*
10276 * Non standard parsing, allowing the user to ignore encoding
10277 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010278 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10279 xmlFree((xmlChar *) encoding);
10280 return(NULL);
10281 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010282
Daniel Veillard6b621b82003-08-11 15:03:34 +000010283 /*
10284 * UTF-16 encoding stwich has already taken place at this stage,
10285 * more over the little-endian/big-endian selection is already done
10286 */
10287 if ((encoding != NULL) &&
10288 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10289 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010290 /*
10291 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010292 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010293 * document is apparently UTF-8 compatible, then raise an
10294 * encoding mismatch fatal error
10295 */
10296 if ((ctxt->encoding == NULL) &&
10297 (ctxt->input->buf != NULL) &&
10298 (ctxt->input->buf->encoder == NULL)) {
10299 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10300 "Document labelled UTF-16 but has UTF-8 content\n");
10301 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010302 if (ctxt->encoding != NULL)
10303 xmlFree((xmlChar *) ctxt->encoding);
10304 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010305 }
10306 /*
10307 * UTF-8 encoding is handled natively
10308 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010309 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010310 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10311 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010312 if (ctxt->encoding != NULL)
10313 xmlFree((xmlChar *) ctxt->encoding);
10314 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010315 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010316 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010317 xmlCharEncodingHandlerPtr handler;
10318
10319 if (ctxt->input->encoding != NULL)
10320 xmlFree((xmlChar *) ctxt->input->encoding);
10321 ctxt->input->encoding = encoding;
10322
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010323 handler = xmlFindCharEncodingHandler((const char *) encoding);
10324 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010325 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10326 /* failed to convert */
10327 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10328 return(NULL);
10329 }
Owen Taylor3473f882001-02-23 17:55:21 +000010330 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010331 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010332 "Unsupported encoding %s\n", encoding);
10333 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010334 }
10335 }
10336 }
10337 return(encoding);
10338}
10339
10340/**
10341 * xmlParseSDDecl:
10342 * @ctxt: an XML parser context
10343 *
10344 * parse the XML standalone declaration
10345 *
10346 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010347 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010348 *
10349 * [ VC: Standalone Document Declaration ]
10350 * TODO The standalone document declaration must have the value "no"
10351 * if any external markup declarations contain declarations of:
10352 * - attributes with default values, if elements to which these
10353 * attributes apply appear in the document without specifications
10354 * of values for these attributes, or
10355 * - entities (other than amp, lt, gt, apos, quot), if references
10356 * to those entities appear in the document, or
10357 * - attributes with values subject to normalization, where the
10358 * attribute appears in the document with a value which will change
10359 * as a result of normalization, or
10360 * - element types with element content, if white space occurs directly
10361 * within any instance of those types.
10362 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010363 * Returns:
10364 * 1 if standalone="yes"
10365 * 0 if standalone="no"
10366 * -2 if standalone attribute is missing or invalid
10367 * (A standalone value of -2 means that the XML declaration was found,
10368 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010369 */
10370
10371int
10372xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010373 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010374
10375 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010376 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010377 SKIP(10);
10378 SKIP_BLANKS;
10379 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010381 return(standalone);
10382 }
10383 NEXT;
10384 SKIP_BLANKS;
10385 if (RAW == '\''){
10386 NEXT;
10387 if ((RAW == 'n') && (NXT(1) == 'o')) {
10388 standalone = 0;
10389 SKIP(2);
10390 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10391 (NXT(2) == 's')) {
10392 standalone = 1;
10393 SKIP(3);
10394 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010395 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010396 }
10397 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010398 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010399 } else
10400 NEXT;
10401 } else if (RAW == '"'){
10402 NEXT;
10403 if ((RAW == 'n') && (NXT(1) == 'o')) {
10404 standalone = 0;
10405 SKIP(2);
10406 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10407 (NXT(2) == 's')) {
10408 standalone = 1;
10409 SKIP(3);
10410 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010411 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010412 }
10413 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010414 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010415 } else
10416 NEXT;
10417 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010418 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010419 }
10420 }
10421 return(standalone);
10422}
10423
10424/**
10425 * xmlParseXMLDecl:
10426 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010427 *
Owen Taylor3473f882001-02-23 17:55:21 +000010428 * parse an XML declaration header
10429 *
10430 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10431 */
10432
10433void
10434xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10435 xmlChar *version;
10436
10437 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010438 * This value for standalone indicates that the document has an
10439 * XML declaration but it does not have a standalone attribute.
10440 * It will be overwritten later if a standalone attribute is found.
10441 */
10442 ctxt->input->standalone = -2;
10443
10444 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010445 * We know that '<?xml' is here.
10446 */
10447 SKIP(5);
10448
William M. Brack76e95df2003-10-18 16:20:14 +000010449 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10451 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010452 }
10453 SKIP_BLANKS;
10454
10455 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010456 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010457 */
10458 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010459 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010460 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010461 } else {
10462 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10463 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010464 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010465 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010466 if (ctxt->options & XML_PARSE_OLD10) {
10467 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10468 "Unsupported version '%s'\n",
10469 version);
10470 } else {
10471 if ((version[0] == '1') && ((version[1] == '.'))) {
10472 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10473 "Unsupported version '%s'\n",
10474 version, NULL);
10475 } else {
10476 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477 "Unsupported version '%s'\n",
10478 version);
10479 }
10480 }
Daniel Veillard19840942001-11-29 16:11:38 +000010481 }
10482 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010483 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010484 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010485 }
Owen Taylor3473f882001-02-23 17:55:21 +000010486
10487 /*
10488 * We may have the encoding declaration
10489 */
William M. Brack76e95df2003-10-18 16:20:14 +000010490 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010491 if ((RAW == '?') && (NXT(1) == '>')) {
10492 SKIP(2);
10493 return;
10494 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010496 }
10497 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010498 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10499 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010500 /*
10501 * The XML REC instructs us to stop parsing right here
10502 */
10503 return;
10504 }
10505
10506 /*
10507 * We may have the standalone status.
10508 */
William M. Brack76e95df2003-10-18 16:20:14 +000010509 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010510 if ((RAW == '?') && (NXT(1) == '>')) {
10511 SKIP(2);
10512 return;
10513 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010515 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010516
10517 /*
10518 * We can grow the input buffer freely at that point
10519 */
10520 GROW;
10521
Owen Taylor3473f882001-02-23 17:55:21 +000010522 SKIP_BLANKS;
10523 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10524
10525 SKIP_BLANKS;
10526 if ((RAW == '?') && (NXT(1) == '>')) {
10527 SKIP(2);
10528 } else if (RAW == '>') {
10529 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010530 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010531 NEXT;
10532 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010534 MOVETO_ENDTAG(CUR_PTR);
10535 NEXT;
10536 }
10537}
10538
10539/**
10540 * xmlParseMisc:
10541 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010542 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010543 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010544 *
10545 * [27] Misc ::= Comment | PI | S
10546 */
10547
10548void
10549xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010550 while ((ctxt->instate != XML_PARSER_EOF) &&
10551 (((RAW == '<') && (NXT(1) == '?')) ||
10552 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10553 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010554 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010555 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010556 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010557 NEXT;
10558 } else
10559 xmlParseComment(ctxt);
10560 }
10561}
10562
10563/**
10564 * xmlParseDocument:
10565 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010566 *
Owen Taylor3473f882001-02-23 17:55:21 +000010567 * parse an XML document (and build a tree if using the standard SAX
10568 * interface).
10569 *
10570 * [1] document ::= prolog element Misc*
10571 *
10572 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10573 *
10574 * Returns 0, -1 in case of error. the parser context is augmented
10575 * as a result of the parsing.
10576 */
10577
10578int
10579xmlParseDocument(xmlParserCtxtPtr ctxt) {
10580 xmlChar start[4];
10581 xmlCharEncoding enc;
10582
10583 xmlInitParser();
10584
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010585 if ((ctxt == NULL) || (ctxt->input == NULL))
10586 return(-1);
10587
Owen Taylor3473f882001-02-23 17:55:21 +000010588 GROW;
10589
10590 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010591 * SAX: detecting the level.
10592 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010593 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010594
10595 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010596 * SAX: beginning of the document processing.
10597 */
10598 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10599 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010600 if (ctxt->instate == XML_PARSER_EOF)
10601 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010602
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010603 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010604 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010605 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010606 * Get the 4 first bytes and decode the charset
10607 * if enc != XML_CHAR_ENCODING_NONE
10608 * plug some encoding conversion routines.
10609 */
10610 start[0] = RAW;
10611 start[1] = NXT(1);
10612 start[2] = NXT(2);
10613 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010614 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010615 if (enc != XML_CHAR_ENCODING_NONE) {
10616 xmlSwitchEncoding(ctxt, enc);
10617 }
Owen Taylor3473f882001-02-23 17:55:21 +000010618 }
10619
10620
10621 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010623 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010624 }
10625
10626 /*
10627 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010628 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010629 * than just the first line, unless the amount of data is really
10630 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010631 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010632 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10633 GROW;
10634 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010635 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010636
10637 /*
10638 * Note that we will switch encoding on the fly.
10639 */
10640 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010641 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10642 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010643 /*
10644 * The XML REC instructs us to stop parsing right here
10645 */
10646 return(-1);
10647 }
10648 ctxt->standalone = ctxt->input->standalone;
10649 SKIP_BLANKS;
10650 } else {
10651 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10652 }
10653 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10654 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010655 if (ctxt->instate == XML_PARSER_EOF)
10656 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010657 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10658 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10659 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10660 }
Owen Taylor3473f882001-02-23 17:55:21 +000010661
10662 /*
10663 * The Misc part of the Prolog
10664 */
10665 GROW;
10666 xmlParseMisc(ctxt);
10667
10668 /*
10669 * Then possibly doc type declaration(s) and more Misc
10670 * (doctypedecl Misc*)?
10671 */
10672 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010673 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010674
10675 ctxt->inSubset = 1;
10676 xmlParseDocTypeDecl(ctxt);
10677 if (RAW == '[') {
10678 ctxt->instate = XML_PARSER_DTD;
10679 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010680 if (ctxt->instate == XML_PARSER_EOF)
10681 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010682 }
10683
10684 /*
10685 * Create and update the external subset.
10686 */
10687 ctxt->inSubset = 2;
10688 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10689 (!ctxt->disableSAX))
10690 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10691 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010692 if (ctxt->instate == XML_PARSER_EOF)
10693 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010694 ctxt->inSubset = 0;
10695
Daniel Veillardac4118d2008-01-11 05:27:32 +000010696 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010697
10698 ctxt->instate = XML_PARSER_PROLOG;
10699 xmlParseMisc(ctxt);
10700 }
10701
10702 /*
10703 * Time to start parsing the tree itself
10704 */
10705 GROW;
10706 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010707 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10708 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010709 } else {
10710 ctxt->instate = XML_PARSER_CONTENT;
10711 xmlParseElement(ctxt);
10712 ctxt->instate = XML_PARSER_EPILOG;
10713
10714
10715 /*
10716 * The Misc part at the end
10717 */
10718 xmlParseMisc(ctxt);
10719
Daniel Veillard561b7f82002-03-20 21:55:57 +000010720 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010721 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010722 }
10723 ctxt->instate = XML_PARSER_EOF;
10724 }
10725
10726 /*
10727 * SAX: end of the document processing.
10728 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010729 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010730 ctxt->sax->endDocument(ctxt->userData);
10731
Daniel Veillard5997aca2002-03-18 18:36:20 +000010732 /*
10733 * Remove locally kept entity definitions if the tree was not built
10734 */
10735 if ((ctxt->myDoc != NULL) &&
10736 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10737 xmlFreeDoc(ctxt->myDoc);
10738 ctxt->myDoc = NULL;
10739 }
10740
Daniel Veillardae0765b2008-07-31 19:54:59 +000010741 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10742 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10743 if (ctxt->valid)
10744 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10745 if (ctxt->nsWellFormed)
10746 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10747 if (ctxt->options & XML_PARSE_OLD10)
10748 ctxt->myDoc->properties |= XML_DOC_OLD10;
10749 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010750 if (! ctxt->wellFormed) {
10751 ctxt->valid = 0;
10752 return(-1);
10753 }
Owen Taylor3473f882001-02-23 17:55:21 +000010754 return(0);
10755}
10756
10757/**
10758 * xmlParseExtParsedEnt:
10759 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010760 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010761 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010762 * An external general parsed entity is well-formed if it matches the
10763 * production labeled extParsedEnt.
10764 *
10765 * [78] extParsedEnt ::= TextDecl? content
10766 *
10767 * Returns 0, -1 in case of error. the parser context is augmented
10768 * as a result of the parsing.
10769 */
10770
10771int
10772xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10773 xmlChar start[4];
10774 xmlCharEncoding enc;
10775
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010776 if ((ctxt == NULL) || (ctxt->input == NULL))
10777 return(-1);
10778
Owen Taylor3473f882001-02-23 17:55:21 +000010779 xmlDefaultSAXHandlerInit();
10780
Daniel Veillard309f81d2003-09-23 09:02:53 +000010781 xmlDetectSAX2(ctxt);
10782
Owen Taylor3473f882001-02-23 17:55:21 +000010783 GROW;
10784
10785 /*
10786 * SAX: beginning of the document processing.
10787 */
10788 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10789 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10790
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010791 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010792 * Get the 4 first bytes and decode the charset
10793 * if enc != XML_CHAR_ENCODING_NONE
10794 * plug some encoding conversion routines.
10795 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010796 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10797 start[0] = RAW;
10798 start[1] = NXT(1);
10799 start[2] = NXT(2);
10800 start[3] = NXT(3);
10801 enc = xmlDetectCharEncoding(start, 4);
10802 if (enc != XML_CHAR_ENCODING_NONE) {
10803 xmlSwitchEncoding(ctxt, enc);
10804 }
Owen Taylor3473f882001-02-23 17:55:21 +000010805 }
10806
10807
10808 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010809 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010810 }
10811
10812 /*
10813 * Check for the XMLDecl in the Prolog.
10814 */
10815 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010816 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010817
10818 /*
10819 * Note that we will switch encoding on the fly.
10820 */
10821 xmlParseXMLDecl(ctxt);
10822 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10823 /*
10824 * The XML REC instructs us to stop parsing right here
10825 */
10826 return(-1);
10827 }
10828 SKIP_BLANKS;
10829 } else {
10830 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10831 }
10832 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10833 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010834 if (ctxt->instate == XML_PARSER_EOF)
10835 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010836
10837 /*
10838 * Doing validity checking on chunk doesn't make sense
10839 */
10840 ctxt->instate = XML_PARSER_CONTENT;
10841 ctxt->validate = 0;
10842 ctxt->loadsubset = 0;
10843 ctxt->depth = 0;
10844
10845 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010846 if (ctxt->instate == XML_PARSER_EOF)
10847 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010848
Owen Taylor3473f882001-02-23 17:55:21 +000010849 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010851 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010853 }
10854
10855 /*
10856 * SAX: end of the document processing.
10857 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010859 ctxt->sax->endDocument(ctxt->userData);
10860
10861 if (! ctxt->wellFormed) return(-1);
10862 return(0);
10863}
10864
Daniel Veillard73b013f2003-09-30 12:36:01 +000010865#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010866/************************************************************************
10867 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010868 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010869 * *
10870 ************************************************************************/
10871
10872/**
10873 * xmlParseLookupSequence:
10874 * @ctxt: an XML parser context
10875 * @first: the first char to lookup
10876 * @next: the next char to lookup or zero
10877 * @third: the next char to lookup or zero
10878 *
10879 * Try to find if a sequence (first, next, third) or just (first next) or
10880 * (first) is available in the input stream.
10881 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10882 * to avoid rescanning sequences of bytes, it DOES change the state of the
10883 * parser, do not use liberally.
10884 *
10885 * Returns the index to the current parsing point if the full sequence
10886 * is available, -1 otherwise.
10887 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010888static int
Owen Taylor3473f882001-02-23 17:55:21 +000010889xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10890 xmlChar next, xmlChar third) {
10891 int base, len;
10892 xmlParserInputPtr in;
10893 const xmlChar *buf;
10894
10895 in = ctxt->input;
10896 if (in == NULL) return(-1);
10897 base = in->cur - in->base;
10898 if (base < 0) return(-1);
10899 if (ctxt->checkIndex > base)
10900 base = ctxt->checkIndex;
10901 if (in->buf == NULL) {
10902 buf = in->base;
10903 len = in->length;
10904 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010905 buf = xmlBufContent(in->buf->buffer);
10906 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010907 }
10908 /* take into account the sequence length */
10909 if (third) len -= 2;
10910 else if (next) len --;
10911 for (;base < len;base++) {
10912 if (buf[base] == first) {
10913 if (third != 0) {
10914 if ((buf[base + 1] != next) ||
10915 (buf[base + 2] != third)) continue;
10916 } else if (next != 0) {
10917 if (buf[base + 1] != next) continue;
10918 }
10919 ctxt->checkIndex = 0;
10920#ifdef DEBUG_PUSH
10921 if (next == 0)
10922 xmlGenericError(xmlGenericErrorContext,
10923 "PP: lookup '%c' found at %d\n",
10924 first, base);
10925 else if (third == 0)
10926 xmlGenericError(xmlGenericErrorContext,
10927 "PP: lookup '%c%c' found at %d\n",
10928 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010929 else
Owen Taylor3473f882001-02-23 17:55:21 +000010930 xmlGenericError(xmlGenericErrorContext,
10931 "PP: lookup '%c%c%c' found at %d\n",
10932 first, next, third, base);
10933#endif
10934 return(base - (in->cur - in->base));
10935 }
10936 }
10937 ctxt->checkIndex = base;
10938#ifdef DEBUG_PUSH
10939 if (next == 0)
10940 xmlGenericError(xmlGenericErrorContext,
10941 "PP: lookup '%c' failed\n", first);
10942 else if (third == 0)
10943 xmlGenericError(xmlGenericErrorContext,
10944 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010945 else
Owen Taylor3473f882001-02-23 17:55:21 +000010946 xmlGenericError(xmlGenericErrorContext,
10947 "PP: lookup '%c%c%c' failed\n", first, next, third);
10948#endif
10949 return(-1);
10950}
10951
10952/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010953 * xmlParseGetLasts:
10954 * @ctxt: an XML parser context
10955 * @lastlt: pointer to store the last '<' from the input
10956 * @lastgt: pointer to store the last '>' from the input
10957 *
10958 * Lookup the last < and > in the current chunk
10959 */
10960static void
10961xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10962 const xmlChar **lastgt) {
10963 const xmlChar *tmp;
10964
10965 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10966 xmlGenericError(xmlGenericErrorContext,
10967 "Internal error: xmlParseGetLasts\n");
10968 return;
10969 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010970 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010971 tmp = ctxt->input->end;
10972 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010973 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010974 if (tmp < ctxt->input->base) {
10975 *lastlt = NULL;
10976 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010977 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010978 *lastlt = tmp;
10979 tmp++;
10980 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10981 if (*tmp == '\'') {
10982 tmp++;
10983 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10984 if (tmp < ctxt->input->end) tmp++;
10985 } else if (*tmp == '"') {
10986 tmp++;
10987 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10988 if (tmp < ctxt->input->end) tmp++;
10989 } else
10990 tmp++;
10991 }
10992 if (tmp < ctxt->input->end)
10993 *lastgt = tmp;
10994 else {
10995 tmp = *lastlt;
10996 tmp--;
10997 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10998 if (tmp >= ctxt->input->base)
10999 *lastgt = tmp;
11000 else
11001 *lastgt = NULL;
11002 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011003 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011004 } else {
11005 *lastlt = NULL;
11006 *lastgt = NULL;
11007 }
11008}
11009/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011010 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011011 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011012 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011013 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011014 *
11015 * Check that the block of characters is okay as SCdata content [20]
11016 *
11017 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011018 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011019 */
11020static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011021xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011022 int ix;
11023 unsigned char c;
11024 int codepoint;
11025
11026 if ((utf == NULL) || (len <= 0))
11027 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011028
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011029 for (ix = 0; ix < len;) { /* string is 0-terminated */
11030 c = utf[ix];
11031 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11032 if (c >= 0x20)
11033 ix++;
11034 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11035 ix++;
11036 else
11037 return(-ix);
11038 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011039 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011040 if ((utf[ix+1] & 0xc0 ) != 0x80)
11041 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011042 codepoint = (utf[ix] & 0x1f) << 6;
11043 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011044 if (!xmlIsCharQ(codepoint))
11045 return(-ix);
11046 ix += 2;
11047 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011048 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011049 if (((utf[ix+1] & 0xc0) != 0x80) ||
11050 ((utf[ix+2] & 0xc0) != 0x80))
11051 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011052 codepoint = (utf[ix] & 0xf) << 12;
11053 codepoint |= (utf[ix+1] & 0x3f) << 6;
11054 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011055 if (!xmlIsCharQ(codepoint))
11056 return(-ix);
11057 ix += 3;
11058 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011059 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011060 if (((utf[ix+1] & 0xc0) != 0x80) ||
11061 ((utf[ix+2] & 0xc0) != 0x80) ||
11062 ((utf[ix+3] & 0xc0) != 0x80))
11063 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011064 codepoint = (utf[ix] & 0x7) << 18;
11065 codepoint |= (utf[ix+1] & 0x3f) << 12;
11066 codepoint |= (utf[ix+2] & 0x3f) << 6;
11067 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011068 if (!xmlIsCharQ(codepoint))
11069 return(-ix);
11070 ix += 4;
11071 } else /* unknown encoding */
11072 return(-ix);
11073 }
11074 return(ix);
11075}
11076
11077/**
Owen Taylor3473f882001-02-23 17:55:21 +000011078 * xmlParseTryOrFinish:
11079 * @ctxt: an XML parser context
11080 * @terminate: last chunk indicator
11081 *
11082 * Try to progress on parsing
11083 *
11084 * Returns zero if no parsing was possible
11085 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011086static int
Owen Taylor3473f882001-02-23 17:55:21 +000011087xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11088 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011089 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011090 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011091 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011092
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011093 if (ctxt->input == NULL)
11094 return(0);
11095
Owen Taylor3473f882001-02-23 17:55:21 +000011096#ifdef DEBUG_PUSH
11097 switch (ctxt->instate) {
11098 case XML_PARSER_EOF:
11099 xmlGenericError(xmlGenericErrorContext,
11100 "PP: try EOF\n"); break;
11101 case XML_PARSER_START:
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: try START\n"); break;
11104 case XML_PARSER_MISC:
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: try MISC\n");break;
11107 case XML_PARSER_COMMENT:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: try COMMENT\n");break;
11110 case XML_PARSER_PROLOG:
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: try PROLOG\n");break;
11113 case XML_PARSER_START_TAG:
11114 xmlGenericError(xmlGenericErrorContext,
11115 "PP: try START_TAG\n");break;
11116 case XML_PARSER_CONTENT:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: try CONTENT\n");break;
11119 case XML_PARSER_CDATA_SECTION:
11120 xmlGenericError(xmlGenericErrorContext,
11121 "PP: try CDATA_SECTION\n");break;
11122 case XML_PARSER_END_TAG:
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: try END_TAG\n");break;
11125 case XML_PARSER_ENTITY_DECL:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: try ENTITY_DECL\n");break;
11128 case XML_PARSER_ENTITY_VALUE:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try ENTITY_VALUE\n");break;
11131 case XML_PARSER_ATTRIBUTE_VALUE:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try ATTRIBUTE_VALUE\n");break;
11134 case XML_PARSER_DTD:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try DTD\n");break;
11137 case XML_PARSER_EPILOG:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try EPILOG\n");break;
11140 case XML_PARSER_PI:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try PI\n");break;
11143 case XML_PARSER_IGNORE:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try IGNORE\n");break;
11146 }
11147#endif
11148
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011149 if ((ctxt->input != NULL) &&
11150 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011151 xmlSHRINK(ctxt);
11152 ctxt->checkIndex = 0;
11153 }
11154 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011155
Daniel Veillarde50ba812013-04-11 15:54:51 +080011156 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011157 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011158 return(0);
11159
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011160 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011161 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011162 avail = ctxt->input->length -
11163 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011164 else {
11165 /*
11166 * If we are operating on converted input, try to flush
11167 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011168 * buffer. But do not do this in document start where
11169 * encoding="..." may not have been read and we work on a
11170 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011171 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011172 if ((ctxt->instate != XML_PARSER_START) &&
11173 (ctxt->input->buf->raw != NULL) &&
11174 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011175 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11176 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011177 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011178
11179 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011180 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11181 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011182 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011183 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011184 (ctxt->input->cur - ctxt->input->base);
11185 }
Owen Taylor3473f882001-02-23 17:55:21 +000011186 if (avail < 1)
11187 goto done;
11188 switch (ctxt->instate) {
11189 case XML_PARSER_EOF:
11190 /*
11191 * Document parsing is done !
11192 */
11193 goto done;
11194 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011195 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11196 xmlChar start[4];
11197 xmlCharEncoding enc;
11198
11199 /*
11200 * Very first chars read from the document flow.
11201 */
11202 if (avail < 4)
11203 goto done;
11204
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011205 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011206 * Get the 4 first bytes and decode the charset
11207 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011208 * plug some encoding conversion routines,
11209 * else xmlSwitchEncoding will set to (default)
11210 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011211 */
11212 start[0] = RAW;
11213 start[1] = NXT(1);
11214 start[2] = NXT(2);
11215 start[3] = NXT(3);
11216 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011217 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011218 break;
11219 }
Owen Taylor3473f882001-02-23 17:55:21 +000011220
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011221 if (avail < 2)
11222 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011223 cur = ctxt->input->cur[0];
11224 next = ctxt->input->cur[1];
11225 if (cur == 0) {
11226 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227 ctxt->sax->setDocumentLocator(ctxt->userData,
11228 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011229 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011230 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011231#ifdef DEBUG_PUSH
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: entering EOF\n");
11234#endif
11235 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236 ctxt->sax->endDocument(ctxt->userData);
11237 goto done;
11238 }
11239 if ((cur == '<') && (next == '?')) {
11240 /* PI or XML decl */
11241 if (avail < 5) return(ret);
11242 if ((!terminate) &&
11243 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11244 return(ret);
11245 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246 ctxt->sax->setDocumentLocator(ctxt->userData,
11247 &xmlDefaultSAXLocator);
11248 if ((ctxt->input->cur[2] == 'x') &&
11249 (ctxt->input->cur[3] == 'm') &&
11250 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011251 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011252 ret += 5;
11253#ifdef DEBUG_PUSH
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: Parsing XML Decl\n");
11256#endif
11257 xmlParseXMLDecl(ctxt);
11258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259 /*
11260 * The XML REC instructs us to stop parsing right
11261 * here
11262 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011263 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011264 return(0);
11265 }
11266 ctxt->standalone = ctxt->input->standalone;
11267 if ((ctxt->encoding == NULL) &&
11268 (ctxt->input->encoding != NULL))
11269 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271 (!ctxt->disableSAX))
11272 ctxt->sax->startDocument(ctxt->userData);
11273 ctxt->instate = XML_PARSER_MISC;
11274#ifdef DEBUG_PUSH
11275 xmlGenericError(xmlGenericErrorContext,
11276 "PP: entering MISC\n");
11277#endif
11278 } else {
11279 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281 (!ctxt->disableSAX))
11282 ctxt->sax->startDocument(ctxt->userData);
11283 ctxt->instate = XML_PARSER_MISC;
11284#ifdef DEBUG_PUSH
11285 xmlGenericError(xmlGenericErrorContext,
11286 "PP: entering MISC\n");
11287#endif
11288 }
11289 } else {
11290 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291 ctxt->sax->setDocumentLocator(ctxt->userData,
11292 &xmlDefaultSAXLocator);
11293 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011294 if (ctxt->version == NULL) {
11295 xmlErrMemory(ctxt, NULL);
11296 break;
11297 }
Owen Taylor3473f882001-02-23 17:55:21 +000011298 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299 (!ctxt->disableSAX))
11300 ctxt->sax->startDocument(ctxt->userData);
11301 ctxt->instate = XML_PARSER_MISC;
11302#ifdef DEBUG_PUSH
11303 xmlGenericError(xmlGenericErrorContext,
11304 "PP: entering MISC\n");
11305#endif
11306 }
11307 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011308 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011309 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011310 const xmlChar *prefix = NULL;
11311 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011312 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011313
11314 if ((avail < 2) && (ctxt->inputNr == 1))
11315 goto done;
11316 cur = ctxt->input->cur[0];
11317 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011318 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011319 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011320 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11321 ctxt->sax->endDocument(ctxt->userData);
11322 goto done;
11323 }
11324 if (!terminate) {
11325 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011326 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011327 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011328 goto done;
11329 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11330 goto done;
11331 }
11332 }
11333 if (ctxt->spaceNr == 0)
11334 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011335 else if (*ctxt->space == -2)
11336 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011337 else
11338 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011339#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011340 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011341#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011342 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011343#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011344 else
11345 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011346#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011347 if (ctxt->instate == XML_PARSER_EOF)
11348 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011349 if (name == NULL) {
11350 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011351 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011352 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11353 ctxt->sax->endDocument(ctxt->userData);
11354 goto done;
11355 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011356#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011357 /*
11358 * [ VC: Root Element Type ]
11359 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011360 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011361 */
11362 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11363 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11364 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011365#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011366
11367 /*
11368 * Check for an Empty Element.
11369 */
11370 if ((RAW == '/') && (NXT(1) == '>')) {
11371 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011372
11373 if (ctxt->sax2) {
11374 if ((ctxt->sax != NULL) &&
11375 (ctxt->sax->endElementNs != NULL) &&
11376 (!ctxt->disableSAX))
11377 ctxt->sax->endElementNs(ctxt->userData, name,
11378 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011379 if (ctxt->nsNr - nsNr > 0)
11380 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011381#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011382 } else {
11383 if ((ctxt->sax != NULL) &&
11384 (ctxt->sax->endElement != NULL) &&
11385 (!ctxt->disableSAX))
11386 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011387#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011388 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011389 if (ctxt->instate == XML_PARSER_EOF)
11390 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011391 spacePop(ctxt);
11392 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011393 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011394 } else {
11395 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011396 }
Daniel Veillard65686452012-07-19 18:25:01 +080011397 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011398 break;
11399 }
11400 if (RAW == '>') {
11401 NEXT;
11402 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011403 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011404 "Couldn't find end of Start Tag %s\n",
11405 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011406 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011407 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011408 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011409 if (ctxt->sax2)
11410 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011411#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011412 else
11413 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011414#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011415
Daniel Veillarda880b122003-04-21 21:36:41 +000011416 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011417 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011418 break;
11419 }
11420 case XML_PARSER_CONTENT: {
11421 const xmlChar *test;
11422 unsigned int cons;
11423 if ((avail < 2) && (ctxt->inputNr == 1))
11424 goto done;
11425 cur = ctxt->input->cur[0];
11426 next = ctxt->input->cur[1];
11427
11428 test = CUR_PTR;
11429 cons = ctxt->input->consumed;
11430 if ((cur == '<') && (next == '/')) {
11431 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011432 break;
11433 } else if ((cur == '<') && (next == '?')) {
11434 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011435 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11436 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011437 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011438 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011439 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011440 ctxt->instate = XML_PARSER_CONTENT;
11441 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011442 } else if ((cur == '<') && (next != '!')) {
11443 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011444 break;
11445 } else if ((cur == '<') && (next == '!') &&
11446 (ctxt->input->cur[2] == '-') &&
11447 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011448 int term;
11449
11450 if (avail < 4)
11451 goto done;
11452 ctxt->input->cur += 4;
11453 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11454 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011455 if ((!terminate) && (term < 0)) {
11456 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011457 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011458 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011459 xmlParseComment(ctxt);
11460 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011461 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011462 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11463 (ctxt->input->cur[2] == '[') &&
11464 (ctxt->input->cur[3] == 'C') &&
11465 (ctxt->input->cur[4] == 'D') &&
11466 (ctxt->input->cur[5] == 'A') &&
11467 (ctxt->input->cur[6] == 'T') &&
11468 (ctxt->input->cur[7] == 'A') &&
11469 (ctxt->input->cur[8] == '[')) {
11470 SKIP(9);
11471 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011472 break;
11473 } else if ((cur == '<') && (next == '!') &&
11474 (avail < 9)) {
11475 goto done;
11476 } else if (cur == '&') {
11477 if ((!terminate) &&
11478 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11479 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011480 xmlParseReference(ctxt);
11481 } else {
11482 /* TODO Avoid the extra copy, handle directly !!! */
11483 /*
11484 * Goal of the following test is:
11485 * - minimize calls to the SAX 'character' callback
11486 * when they are mergeable
11487 * - handle an problem for isBlank when we only parse
11488 * a sequence of blank chars and the next one is
11489 * not available to check against '<' presence.
11490 * - tries to homogenize the differences in SAX
11491 * callbacks between the push and pull versions
11492 * of the parser.
11493 */
11494 if ((ctxt->inputNr == 1) &&
11495 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11496 if (!terminate) {
11497 if (ctxt->progressive) {
11498 if ((lastlt == NULL) ||
11499 (ctxt->input->cur > lastlt))
11500 goto done;
11501 } else if (xmlParseLookupSequence(ctxt,
11502 '<', 0, 0) < 0) {
11503 goto done;
11504 }
11505 }
11506 }
11507 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011508 xmlParseCharData(ctxt, 0);
11509 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011510 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011511 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11512 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011513 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011514 break;
11515 }
11516 break;
11517 }
11518 case XML_PARSER_END_TAG:
11519 if (avail < 2)
11520 goto done;
11521 if (!terminate) {
11522 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011523 /* > can be found unescaped in attribute values */
11524 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011525 goto done;
11526 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11527 goto done;
11528 }
11529 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011530 if (ctxt->sax2) {
11531 xmlParseEndTag2(ctxt,
Nick Wellnhoferd422b952017-10-09 13:37:42 +020011532 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11533 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11534 (int) (ptrdiff_t)
11535 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011536 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011537 }
11538#ifdef LIBXML_SAX1_ENABLED
11539 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011540 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011541#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011542 if (ctxt->instate == XML_PARSER_EOF) {
11543 /* Nothing */
11544 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011545 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011546 } else {
11547 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 }
11549 break;
11550 case XML_PARSER_CDATA_SECTION: {
11551 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011552 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011553 * cdataBlock merge back contiguous callbacks.
11554 */
11555 int base;
11556
11557 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11558 if (base < 0) {
11559 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011560 int tmp;
11561
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011562 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011563 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011564 if (tmp < 0) {
11565 tmp = -tmp;
11566 ctxt->input->cur += tmp;
11567 goto encoding_error;
11568 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11570 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011571 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011572 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011573 else if (ctxt->sax->characters != NULL)
11574 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011575 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011576 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011577 if (ctxt->instate == XML_PARSER_EOF)
11578 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011579 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011580 ctxt->checkIndex = 0;
11581 }
11582 goto done;
11583 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011584 int tmp;
11585
David Kilzer4f8606c2016-01-05 13:38:09 -080011586 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011587 if ((tmp < 0) || (tmp != base)) {
11588 tmp = -tmp;
11589 ctxt->input->cur += tmp;
11590 goto encoding_error;
11591 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011592 if ((ctxt->sax != NULL) && (base == 0) &&
11593 (ctxt->sax->cdataBlock != NULL) &&
11594 (!ctxt->disableSAX)) {
11595 /*
11596 * Special case to provide identical behaviour
11597 * between pull and push parsers on enpty CDATA
11598 * sections
11599 */
11600 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11601 (!strncmp((const char *)&ctxt->input->cur[-9],
11602 "<![CDATA[", 9)))
11603 ctxt->sax->cdataBlock(ctxt->userData,
11604 BAD_CAST "", 0);
11605 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011606 (!ctxt->disableSAX)) {
11607 if (ctxt->sax->cdataBlock != NULL)
11608 ctxt->sax->cdataBlock(ctxt->userData,
11609 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011610 else if (ctxt->sax->characters != NULL)
11611 ctxt->sax->characters(ctxt->userData,
11612 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011613 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011614 if (ctxt->instate == XML_PARSER_EOF)
11615 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011616 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011617 ctxt->checkIndex = 0;
11618 ctxt->instate = XML_PARSER_CONTENT;
11619#ifdef DEBUG_PUSH
11620 xmlGenericError(xmlGenericErrorContext,
11621 "PP: entering CONTENT\n");
11622#endif
11623 }
11624 break;
11625 }
Owen Taylor3473f882001-02-23 17:55:21 +000011626 case XML_PARSER_MISC:
11627 SKIP_BLANKS;
11628 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011629 avail = ctxt->input->length -
11630 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011631 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011632 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011633 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011634 if (avail < 2)
11635 goto done;
11636 cur = ctxt->input->cur[0];
11637 next = ctxt->input->cur[1];
11638 if ((cur == '<') && (next == '?')) {
11639 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11641 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011642 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011643 }
Owen Taylor3473f882001-02-23 17:55:21 +000011644#ifdef DEBUG_PUSH
11645 xmlGenericError(xmlGenericErrorContext,
11646 "PP: Parsing PI\n");
11647#endif
11648 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011649 if (ctxt->instate == XML_PARSER_EOF)
11650 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011651 ctxt->instate = XML_PARSER_MISC;
11652 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011653 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011654 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011655 (ctxt->input->cur[2] == '-') &&
11656 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011657 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011658 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11659 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011660 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011661 }
Owen Taylor3473f882001-02-23 17:55:21 +000011662#ifdef DEBUG_PUSH
11663 xmlGenericError(xmlGenericErrorContext,
11664 "PP: Parsing Comment\n");
11665#endif
11666 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011667 if (ctxt->instate == XML_PARSER_EOF)
11668 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011669 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011670 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011671 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011672 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011673 (ctxt->input->cur[2] == 'D') &&
11674 (ctxt->input->cur[3] == 'O') &&
11675 (ctxt->input->cur[4] == 'C') &&
11676 (ctxt->input->cur[5] == 'T') &&
11677 (ctxt->input->cur[6] == 'Y') &&
11678 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011679 (ctxt->input->cur[8] == 'E')) {
11680 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011681 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11682 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011683 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011684 }
Owen Taylor3473f882001-02-23 17:55:21 +000011685#ifdef DEBUG_PUSH
11686 xmlGenericError(xmlGenericErrorContext,
11687 "PP: Parsing internal subset\n");
11688#endif
11689 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011690 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011691 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011692 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011693 if (ctxt->instate == XML_PARSER_EOF)
11694 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011695 if (RAW == '[') {
11696 ctxt->instate = XML_PARSER_DTD;
11697#ifdef DEBUG_PUSH
11698 xmlGenericError(xmlGenericErrorContext,
11699 "PP: entering DTD\n");
11700#endif
11701 } else {
11702 /*
11703 * Create and update the external subset.
11704 */
11705 ctxt->inSubset = 2;
11706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11707 (ctxt->sax->externalSubset != NULL))
11708 ctxt->sax->externalSubset(ctxt->userData,
11709 ctxt->intSubName, ctxt->extSubSystem,
11710 ctxt->extSubURI);
11711 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011712 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011713 ctxt->instate = XML_PARSER_PROLOG;
11714#ifdef DEBUG_PUSH
11715 xmlGenericError(xmlGenericErrorContext,
11716 "PP: entering PROLOG\n");
11717#endif
11718 }
11719 } else if ((cur == '<') && (next == '!') &&
11720 (avail < 9)) {
11721 goto done;
11722 } else {
11723 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011724 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011725 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011726#ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: entering START_TAG\n");
11729#endif
11730 }
11731 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011732 case XML_PARSER_PROLOG:
11733 SKIP_BLANKS;
11734 if (ctxt->input->buf == NULL)
11735 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11736 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011737 avail = xmlBufUse(ctxt->input->buf->buffer) -
11738 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011739 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011740 goto done;
11741 cur = ctxt->input->cur[0];
11742 next = ctxt->input->cur[1];
11743 if ((cur == '<') && (next == '?')) {
11744 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011745 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11746 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011747 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011748 }
Owen Taylor3473f882001-02-23 17:55:21 +000011749#ifdef DEBUG_PUSH
11750 xmlGenericError(xmlGenericErrorContext,
11751 "PP: Parsing PI\n");
11752#endif
11753 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011754 if (ctxt->instate == XML_PARSER_EOF)
11755 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011756 ctxt->instate = XML_PARSER_PROLOG;
11757 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011758 } else if ((cur == '<') && (next == '!') &&
11759 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11760 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011761 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11762 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011763 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011764 }
Owen Taylor3473f882001-02-23 17:55:21 +000011765#ifdef DEBUG_PUSH
11766 xmlGenericError(xmlGenericErrorContext,
11767 "PP: Parsing Comment\n");
11768#endif
11769 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011770 if (ctxt->instate == XML_PARSER_EOF)
11771 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011772 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011773 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011774 } else if ((cur == '<') && (next == '!') &&
11775 (avail < 4)) {
11776 goto done;
11777 } else {
11778 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011779 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011780 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011781 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011782#ifdef DEBUG_PUSH
11783 xmlGenericError(xmlGenericErrorContext,
11784 "PP: entering START_TAG\n");
11785#endif
11786 }
11787 break;
11788 case XML_PARSER_EPILOG:
11789 SKIP_BLANKS;
11790 if (ctxt->input->buf == NULL)
11791 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11792 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011793 avail = xmlBufUse(ctxt->input->buf->buffer) -
11794 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011795 if (avail < 2)
11796 goto done;
11797 cur = ctxt->input->cur[0];
11798 next = ctxt->input->cur[1];
11799 if ((cur == '<') && (next == '?')) {
11800 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011801 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11802 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011803 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011804 }
Owen Taylor3473f882001-02-23 17:55:21 +000011805#ifdef DEBUG_PUSH
11806 xmlGenericError(xmlGenericErrorContext,
11807 "PP: Parsing PI\n");
11808#endif
11809 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011810 if (ctxt->instate == XML_PARSER_EOF)
11811 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011812 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011813 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011814 } else if ((cur == '<') && (next == '!') &&
11815 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11816 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011817 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11818 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011819 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011820 }
Owen Taylor3473f882001-02-23 17:55:21 +000011821#ifdef DEBUG_PUSH
11822 xmlGenericError(xmlGenericErrorContext,
11823 "PP: Parsing Comment\n");
11824#endif
11825 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011826 if (ctxt->instate == XML_PARSER_EOF)
11827 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011828 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011829 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011830 } else if ((cur == '<') && (next == '!') &&
11831 (avail < 4)) {
11832 goto done;
11833 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011834 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011835 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011836#ifdef DEBUG_PUSH
11837 xmlGenericError(xmlGenericErrorContext,
11838 "PP: entering EOF\n");
11839#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011840 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011841 ctxt->sax->endDocument(ctxt->userData);
11842 goto done;
11843 }
11844 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011845 case XML_PARSER_DTD: {
11846 /*
11847 * Sorry but progressive parsing of the internal subset
11848 * is not expected to be supported. We first check that
11849 * the full content of the internal subset is available and
11850 * the parsing is launched only at that point.
11851 * Internal subset ends up with "']' S? '>'" in an unescaped
11852 * section and not in a ']]>' sequence which are conditional
11853 * sections (whoever argued to keep that crap in XML deserve
11854 * a place in hell !).
11855 */
11856 int base, i;
11857 xmlChar *buf;
11858 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011859 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011860
11861 base = ctxt->input->cur - ctxt->input->base;
11862 if (base < 0) return(0);
11863 if (ctxt->checkIndex > base)
11864 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011865 buf = xmlBufContent(ctxt->input->buf->buffer);
11866 use = xmlBufUse(ctxt->input->buf->buffer);
11867 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011868 if (quote != 0) {
11869 if (buf[base] == quote)
11870 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011871 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011872 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011873 if ((quote == 0) && (buf[base] == '<')) {
11874 int found = 0;
11875 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011876 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011877 (buf[base + 1] == '!') &&
11878 (buf[base + 2] == '-') &&
11879 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011880 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011881 if ((buf[base] == '-') &&
11882 (buf[base + 1] == '-') &&
11883 (buf[base + 2] == '>')) {
11884 found = 1;
11885 base += 2;
11886 break;
11887 }
11888 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011889 if (!found) {
11890#if 0
11891 fprintf(stderr, "unfinished comment\n");
11892#endif
11893 break; /* for */
11894 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011895 continue;
11896 }
11897 }
Owen Taylor3473f882001-02-23 17:55:21 +000011898 if (buf[base] == '"') {
11899 quote = '"';
11900 continue;
11901 }
11902 if (buf[base] == '\'') {
11903 quote = '\'';
11904 continue;
11905 }
11906 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011907#if 0
11908 fprintf(stderr, "%c%c%c%c: ", buf[base],
11909 buf[base + 1], buf[base + 2], buf[base + 3]);
11910#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011911 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011912 break;
11913 if (buf[base + 1] == ']') {
11914 /* conditional crap, skip both ']' ! */
11915 base++;
11916 continue;
11917 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011918 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011919 if (buf[base + i] == '>') {
11920#if 0
11921 fprintf(stderr, "found\n");
11922#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011923 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011924 }
11925 if (!IS_BLANK_CH(buf[base + i])) {
11926#if 0
11927 fprintf(stderr, "not found\n");
11928#endif
11929 goto not_end_of_int_subset;
11930 }
Owen Taylor3473f882001-02-23 17:55:21 +000011931 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011932#if 0
11933 fprintf(stderr, "end of stream\n");
11934#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011935 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011936
Owen Taylor3473f882001-02-23 17:55:21 +000011937 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011938not_end_of_int_subset:
11939 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011940 }
11941 /*
11942 * We didn't found the end of the Internal subset
11943 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011944 if (quote == 0)
11945 ctxt->checkIndex = base;
11946 else
11947 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011948#ifdef DEBUG_PUSH
11949 if (next == 0)
11950 xmlGenericError(xmlGenericErrorContext,
11951 "PP: lookup of int subset end filed\n");
11952#endif
11953 goto done;
11954
11955found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011956 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011957 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011958 if (ctxt->instate == XML_PARSER_EOF)
11959 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011960 ctxt->inSubset = 2;
11961 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11962 (ctxt->sax->externalSubset != NULL))
11963 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11964 ctxt->extSubSystem, ctxt->extSubURI);
11965 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011966 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011967 if (ctxt->instate == XML_PARSER_EOF)
11968 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011969 ctxt->instate = XML_PARSER_PROLOG;
11970 ctxt->checkIndex = 0;
11971#ifdef DEBUG_PUSH
11972 xmlGenericError(xmlGenericErrorContext,
11973 "PP: entering PROLOG\n");
11974#endif
11975 break;
11976 }
11977 case XML_PARSER_COMMENT:
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: internal error, state == COMMENT\n");
11980 ctxt->instate = XML_PARSER_CONTENT;
11981#ifdef DEBUG_PUSH
11982 xmlGenericError(xmlGenericErrorContext,
11983 "PP: entering CONTENT\n");
11984#endif
11985 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011986 case XML_PARSER_IGNORE:
11987 xmlGenericError(xmlGenericErrorContext,
11988 "PP: internal error, state == IGNORE");
11989 ctxt->instate = XML_PARSER_DTD;
11990#ifdef DEBUG_PUSH
11991 xmlGenericError(xmlGenericErrorContext,
11992 "PP: entering DTD\n");
11993#endif
11994 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011995 case XML_PARSER_PI:
11996 xmlGenericError(xmlGenericErrorContext,
11997 "PP: internal error, state == PI\n");
11998 ctxt->instate = XML_PARSER_CONTENT;
11999#ifdef DEBUG_PUSH
12000 xmlGenericError(xmlGenericErrorContext,
12001 "PP: entering CONTENT\n");
12002#endif
12003 break;
12004 case XML_PARSER_ENTITY_DECL:
12005 xmlGenericError(xmlGenericErrorContext,
12006 "PP: internal error, state == ENTITY_DECL\n");
12007 ctxt->instate = XML_PARSER_DTD;
12008#ifdef DEBUG_PUSH
12009 xmlGenericError(xmlGenericErrorContext,
12010 "PP: entering DTD\n");
12011#endif
12012 break;
12013 case XML_PARSER_ENTITY_VALUE:
12014 xmlGenericError(xmlGenericErrorContext,
12015 "PP: internal error, state == ENTITY_VALUE\n");
12016 ctxt->instate = XML_PARSER_CONTENT;
12017#ifdef DEBUG_PUSH
12018 xmlGenericError(xmlGenericErrorContext,
12019 "PP: entering DTD\n");
12020#endif
12021 break;
12022 case XML_PARSER_ATTRIBUTE_VALUE:
12023 xmlGenericError(xmlGenericErrorContext,
12024 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12025 ctxt->instate = XML_PARSER_START_TAG;
12026#ifdef DEBUG_PUSH
12027 xmlGenericError(xmlGenericErrorContext,
12028 "PP: entering START_TAG\n");
12029#endif
12030 break;
12031 case XML_PARSER_SYSTEM_LITERAL:
12032 xmlGenericError(xmlGenericErrorContext,
12033 "PP: internal error, state == SYSTEM_LITERAL\n");
12034 ctxt->instate = XML_PARSER_START_TAG;
12035#ifdef DEBUG_PUSH
12036 xmlGenericError(xmlGenericErrorContext,
12037 "PP: entering START_TAG\n");
12038#endif
12039 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012040 case XML_PARSER_PUBLIC_LITERAL:
12041 xmlGenericError(xmlGenericErrorContext,
12042 "PP: internal error, state == PUBLIC_LITERAL\n");
12043 ctxt->instate = XML_PARSER_START_TAG;
12044#ifdef DEBUG_PUSH
12045 xmlGenericError(xmlGenericErrorContext,
12046 "PP: entering START_TAG\n");
12047#endif
12048 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012049 }
12050 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012051done:
Owen Taylor3473f882001-02-23 17:55:21 +000012052#ifdef DEBUG_PUSH
12053 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12054#endif
12055 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012056encoding_error:
12057 {
12058 char buffer[150];
12059
12060 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12061 ctxt->input->cur[0], ctxt->input->cur[1],
12062 ctxt->input->cur[2], ctxt->input->cur[3]);
12063 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12064 "Input is not proper UTF-8, indicate encoding !\n%s",
12065 BAD_CAST buffer, NULL);
12066 }
12067 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012068}
12069
12070/**
Daniel Veillard65686452012-07-19 18:25:01 +080012071 * xmlParseCheckTransition:
12072 * @ctxt: an XML parser context
12073 * @chunk: a char array
12074 * @size: the size in byte of the chunk
12075 *
12076 * Check depending on the current parser state if the chunk given must be
12077 * processed immediately or one need more data to advance on parsing.
12078 *
12079 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12080 */
12081static int
12082xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12083 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12084 return(-1);
12085 if (ctxt->instate == XML_PARSER_START_TAG) {
12086 if (memchr(chunk, '>', size) != NULL)
12087 return(1);
12088 return(0);
12089 }
12090 if (ctxt->progressive == XML_PARSER_COMMENT) {
12091 if (memchr(chunk, '>', size) != NULL)
12092 return(1);
12093 return(0);
12094 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012095 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12096 if (memchr(chunk, '>', size) != NULL)
12097 return(1);
12098 return(0);
12099 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012100 if (ctxt->progressive == XML_PARSER_PI) {
12101 if (memchr(chunk, '>', size) != NULL)
12102 return(1);
12103 return(0);
12104 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012105 if (ctxt->instate == XML_PARSER_END_TAG) {
12106 if (memchr(chunk, '>', size) != NULL)
12107 return(1);
12108 return(0);
12109 }
12110 if ((ctxt->progressive == XML_PARSER_DTD) ||
12111 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012112 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012113 return(1);
12114 return(0);
12115 }
Daniel Veillard65686452012-07-19 18:25:01 +080012116 return(1);
12117}
12118
12119/**
Owen Taylor3473f882001-02-23 17:55:21 +000012120 * xmlParseChunk:
12121 * @ctxt: an XML parser context
12122 * @chunk: an char array
12123 * @size: the size in byte of the chunk
12124 * @terminate: last chunk indicator
12125 *
12126 * Parse a Chunk of memory
12127 *
12128 * Returns zero if no error, the xmlParserErrors otherwise.
12129 */
12130int
12131xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12132 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012133 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012134 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012135 size_t old_avail = 0;
12136 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012137
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012138 if (ctxt == NULL)
12139 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012140 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012141 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012142 if (ctxt->instate == XML_PARSER_EOF)
12143 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012144 if (ctxt->instate == XML_PARSER_START)
12145 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012146 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12147 (chunk[size - 1] == '\r')) {
12148 end_in_lf = 1;
12149 size--;
12150 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012151
12152xmldecl_done:
12153
Owen Taylor3473f882001-02-23 17:55:21 +000012154 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12155 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012156 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12157 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012158 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012159
Daniel Veillard65686452012-07-19 18:25:01 +080012160 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012161 /*
12162 * Specific handling if we autodetected an encoding, we should not
12163 * push more than the first line ... which depend on the encoding
12164 * And only push the rest once the final encoding was detected
12165 */
12166 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12167 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012168 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012169
12170 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12171 BAD_CAST "UTF-16")) ||
12172 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12173 BAD_CAST "UTF16")))
12174 len = 90;
12175 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176 BAD_CAST "UCS-4")) ||
12177 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12178 BAD_CAST "UCS4")))
12179 len = 180;
12180
12181 if (ctxt->input->buf->rawconsumed < len)
12182 len -= ctxt->input->buf->rawconsumed;
12183
Raul Hudeaba9716a2010-03-15 10:13:29 +010012184 /*
12185 * Change size for reading the initial declaration only
12186 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12187 * will blindly copy extra bytes from memory.
12188 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012189 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012190 remain = size - len;
12191 size = len;
12192 } else {
12193 remain = 0;
12194 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012195 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012196 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012197 if (res < 0) {
12198 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012199 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012200 return (XML_PARSER_EOF);
12201 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012202 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012203#ifdef DEBUG_PUSH
12204 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12205#endif
12206
Owen Taylor3473f882001-02-23 17:55:21 +000012207 } else if (ctxt->instate != XML_PARSER_EOF) {
12208 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12209 xmlParserInputBufferPtr in = ctxt->input->buf;
12210 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12211 (in->raw != NULL)) {
12212 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012213 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12214 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012215
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012216 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012217 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012218 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012219 xmlGenericError(xmlGenericErrorContext,
12220 "xmlParseChunk: encoder error\n");
Nick Wellnhoferab362ab2018-01-22 15:40:05 +010012221 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012222 return(XML_ERR_INVALID_ENCODING);
12223 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012224 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012225 }
12226 }
12227 }
Daniel Veillard65686452012-07-19 18:25:01 +080012228 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012229 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012230 } else {
12231 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12232 avail = xmlBufUse(ctxt->input->buf->buffer);
12233 /*
12234 * Depending on the current state it may not be such
12235 * a good idea to try parsing if there is nothing in the chunk
12236 * which would be worth doing a parser state transition and we
12237 * need to wait for more data
12238 */
12239 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12240 (old_avail == 0) || (avail == 0) ||
12241 (xmlParseCheckTransition(ctxt,
12242 (const char *)&ctxt->input->base[old_avail],
12243 avail - old_avail)))
12244 xmlParseTryOrFinish(ctxt, terminate);
12245 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012246 if (ctxt->instate == XML_PARSER_EOF)
12247 return(ctxt->errNo);
12248
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012249 if ((ctxt->input != NULL) &&
12250 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12251 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12252 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12253 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012254 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012255 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012256 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12257 return(ctxt->errNo);
12258
12259 if (remain != 0) {
12260 chunk += size;
12261 size = remain;
12262 remain = 0;
12263 goto xmldecl_done;
12264 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012265 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12266 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012267 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12268 ctxt->input);
12269 size_t current = ctxt->input->cur - ctxt->input->base;
12270
Daniel Veillarda617e242006-01-09 14:38:44 +000012271 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012272
12273 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12274 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012275 }
Owen Taylor3473f882001-02-23 17:55:21 +000012276 if (terminate) {
12277 /*
12278 * Check for termination
12279 */
Daniel Veillard65686452012-07-19 18:25:01 +080012280 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012281
12282 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012283 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012284 cur_avail = ctxt->input->length -
12285 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012286 else
Daniel Veillard65686452012-07-19 18:25:01 +080012287 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12288 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012289 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012290
Owen Taylor3473f882001-02-23 17:55:21 +000012291 if ((ctxt->instate != XML_PARSER_EOF) &&
12292 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012293 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012294 }
Daniel Veillard65686452012-07-19 18:25:01 +080012295 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012296 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012297 }
Owen Taylor3473f882001-02-23 17:55:21 +000012298 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012299 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012300 ctxt->sax->endDocument(ctxt->userData);
12301 }
12302 ctxt->instate = XML_PARSER_EOF;
12303 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012304 if (ctxt->wellFormed == 0)
12305 return((xmlParserErrors) ctxt->errNo);
12306 else
12307 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012308}
12309
12310/************************************************************************
12311 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012312 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012313 * *
12314 ************************************************************************/
12315
12316/**
Owen Taylor3473f882001-02-23 17:55:21 +000012317 * xmlCreatePushParserCtxt:
12318 * @sax: a SAX handler
12319 * @user_data: The user data returned on SAX callbacks
12320 * @chunk: a pointer to an array of chars
12321 * @size: number of chars in the array
12322 * @filename: an optional file name or URI
12323 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012324 * Create a parser context for using the XML parser in push mode.
12325 * If @buffer and @size are non-NULL, the data is used to detect
12326 * the encoding. The remaining characters will be parsed so they
12327 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012328 * To allow content encoding detection, @size should be >= 4
12329 * The value of @filename is used for fetching external entities
12330 * and error/warning reports.
12331 *
12332 * Returns the new parser context or NULL
12333 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012334
Owen Taylor3473f882001-02-23 17:55:21 +000012335xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012336xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012337 const char *chunk, int size, const char *filename) {
12338 xmlParserCtxtPtr ctxt;
12339 xmlParserInputPtr inputStream;
12340 xmlParserInputBufferPtr buf;
12341 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12342
12343 /*
12344 * plug some encoding conversion routines
12345 */
12346 if ((chunk != NULL) && (size >= 4))
12347 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12348
12349 buf = xmlAllocParserInputBuffer(enc);
12350 if (buf == NULL) return(NULL);
12351
12352 ctxt = xmlNewParserCtxt();
12353 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012354 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012355 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012356 return(NULL);
12357 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012358 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012359 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12360 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012361 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012362 xmlFreeParserInputBuffer(buf);
12363 xmlFreeParserCtxt(ctxt);
12364 return(NULL);
12365 }
Owen Taylor3473f882001-02-23 17:55:21 +000012366 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012367#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012368 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012369#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012370 xmlFree(ctxt->sax);
12371 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12372 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012373 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012374 xmlFreeParserInputBuffer(buf);
12375 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012376 return(NULL);
12377 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012378 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12379 if (sax->initialized == XML_SAX2_MAGIC)
12380 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12381 else
12382 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012383 if (user_data != NULL)
12384 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012385 }
Owen Taylor3473f882001-02-23 17:55:21 +000012386 if (filename == NULL) {
12387 ctxt->directory = NULL;
12388 } else {
12389 ctxt->directory = xmlParserGetDirectory(filename);
12390 }
12391
12392 inputStream = xmlNewInputStream(ctxt);
12393 if (inputStream == NULL) {
12394 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012395 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012396 return(NULL);
12397 }
12398
12399 if (filename == NULL)
12400 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012401 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012402 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012403 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012404 if (inputStream->filename == NULL) {
12405 xmlFreeParserCtxt(ctxt);
12406 xmlFreeParserInputBuffer(buf);
12407 return(NULL);
12408 }
12409 }
Owen Taylor3473f882001-02-23 17:55:21 +000012410 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012411 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012412 inputPush(ctxt, inputStream);
12413
William M. Brack3a1cd212005-02-11 14:35:54 +000012414 /*
12415 * If the caller didn't provide an initial 'chunk' for determining
12416 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12417 * that it can be automatically determined later
12418 */
12419 if ((size == 0) || (chunk == NULL)) {
12420 ctxt->charset = XML_CHAR_ENCODING_NONE;
12421 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012422 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12423 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012424
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012425 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012426
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012427 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012428#ifdef DEBUG_PUSH
12429 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12430#endif
12431 }
12432
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012433 if (enc != XML_CHAR_ENCODING_NONE) {
12434 xmlSwitchEncoding(ctxt, enc);
12435 }
12436
Owen Taylor3473f882001-02-23 17:55:21 +000012437 return(ctxt);
12438}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012439#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012440
12441/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012442 * xmlHaltParser:
12443 * @ctxt: an XML parser context
12444 *
12445 * Blocks further parser processing don't override error
12446 * for internal use
12447 */
12448static void
12449xmlHaltParser(xmlParserCtxtPtr ctxt) {
12450 if (ctxt == NULL)
12451 return;
12452 ctxt->instate = XML_PARSER_EOF;
12453 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012454 while (ctxt->inputNr > 1)
12455 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012456 if (ctxt->input != NULL) {
12457 /*
12458 * in case there was a specific allocation deallocate before
12459 * overriding base
12460 */
12461 if (ctxt->input->free != NULL) {
12462 ctxt->input->free((xmlChar *) ctxt->input->base);
12463 ctxt->input->free = NULL;
12464 }
12465 ctxt->input->cur = BAD_CAST"";
12466 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012467 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012468 }
12469}
12470
12471/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012472 * xmlStopParser:
12473 * @ctxt: an XML parser context
12474 *
12475 * Blocks further parser processing
12476 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012477void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012478xmlStopParser(xmlParserCtxtPtr ctxt) {
12479 if (ctxt == NULL)
12480 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012481 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012482 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012483}
12484
12485/**
Owen Taylor3473f882001-02-23 17:55:21 +000012486 * xmlCreateIOParserCtxt:
12487 * @sax: a SAX handler
12488 * @user_data: The user data returned on SAX callbacks
12489 * @ioread: an I/O read function
12490 * @ioclose: an I/O close function
12491 * @ioctx: an I/O handler
12492 * @enc: the charset encoding if known
12493 *
12494 * Create a parser context for using the XML parser with an existing
12495 * I/O stream
12496 *
12497 * Returns the new parser context or NULL
12498 */
12499xmlParserCtxtPtr
12500xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12501 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12502 void *ioctx, xmlCharEncoding enc) {
12503 xmlParserCtxtPtr ctxt;
12504 xmlParserInputPtr inputStream;
12505 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012506
Daniel Veillard42595322004-11-08 10:52:06 +000012507 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012508
12509 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012510 if (buf == NULL) {
12511 if (ioclose != NULL)
12512 ioclose(ioctx);
12513 return (NULL);
12514 }
Owen Taylor3473f882001-02-23 17:55:21 +000012515
12516 ctxt = xmlNewParserCtxt();
12517 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012518 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012519 return(NULL);
12520 }
12521 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012522#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012523 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012524#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012525 xmlFree(ctxt->sax);
12526 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12527 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012528 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012529 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012530 return(NULL);
12531 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012532 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12533 if (sax->initialized == XML_SAX2_MAGIC)
12534 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12535 else
12536 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012537 if (user_data != NULL)
12538 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012539 }
Owen Taylor3473f882001-02-23 17:55:21 +000012540
12541 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12542 if (inputStream == NULL) {
12543 xmlFreeParserCtxt(ctxt);
12544 return(NULL);
12545 }
12546 inputPush(ctxt, inputStream);
12547
12548 return(ctxt);
12549}
12550
Daniel Veillard4432df22003-09-28 18:58:27 +000012551#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012552/************************************************************************
12553 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012554 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012555 * *
12556 ************************************************************************/
12557
12558/**
12559 * xmlIOParseDTD:
12560 * @sax: the SAX handler block or NULL
12561 * @input: an Input Buffer
12562 * @enc: the charset encoding if known
12563 *
12564 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012565 *
Owen Taylor3473f882001-02-23 17:55:21 +000012566 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012567 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012568 */
12569
12570xmlDtdPtr
12571xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12572 xmlCharEncoding enc) {
12573 xmlDtdPtr ret = NULL;
12574 xmlParserCtxtPtr ctxt;
12575 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012576 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012577
12578 if (input == NULL)
12579 return(NULL);
12580
12581 ctxt = xmlNewParserCtxt();
12582 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012583 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012584 return(NULL);
12585 }
12586
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012587 /* We are loading a DTD */
12588 ctxt->options |= XML_PARSE_DTDLOAD;
12589
Owen Taylor3473f882001-02-23 17:55:21 +000012590 /*
12591 * Set-up the SAX context
12592 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012593 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012594 if (ctxt->sax != NULL)
12595 xmlFree(ctxt->sax);
12596 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012597 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012598 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012599 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012600
12601 /*
12602 * generate a parser input from the I/O handler
12603 */
12604
Daniel Veillard43caefb2003-12-07 19:32:22 +000012605 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012606 if (pinput == NULL) {
12607 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012608 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012609 xmlFreeParserCtxt(ctxt);
12610 return(NULL);
12611 }
12612
12613 /*
12614 * plug some encoding conversion routines here.
12615 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012616 if (xmlPushInput(ctxt, pinput) < 0) {
12617 if (sax != NULL) ctxt->sax = NULL;
12618 xmlFreeParserCtxt(ctxt);
12619 return(NULL);
12620 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012621 if (enc != XML_CHAR_ENCODING_NONE) {
12622 xmlSwitchEncoding(ctxt, enc);
12623 }
Owen Taylor3473f882001-02-23 17:55:21 +000012624
12625 pinput->filename = NULL;
12626 pinput->line = 1;
12627 pinput->col = 1;
12628 pinput->base = ctxt->input->cur;
12629 pinput->cur = ctxt->input->cur;
12630 pinput->free = NULL;
12631
12632 /*
12633 * let's parse that entity knowing it's an external subset.
12634 */
12635 ctxt->inSubset = 2;
12636 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012637 if (ctxt->myDoc == NULL) {
12638 xmlErrMemory(ctxt, "New Doc failed");
12639 return(NULL);
12640 }
12641 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012642 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12643 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012644
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012645 if ((enc == XML_CHAR_ENCODING_NONE) &&
12646 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012647 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012648 * Get the 4 first bytes and decode the charset
12649 * if enc != XML_CHAR_ENCODING_NONE
12650 * plug some encoding conversion routines.
12651 */
12652 start[0] = RAW;
12653 start[1] = NXT(1);
12654 start[2] = NXT(2);
12655 start[3] = NXT(3);
12656 enc = xmlDetectCharEncoding(start, 4);
12657 if (enc != XML_CHAR_ENCODING_NONE) {
12658 xmlSwitchEncoding(ctxt, enc);
12659 }
12660 }
12661
Owen Taylor3473f882001-02-23 17:55:21 +000012662 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12663
12664 if (ctxt->myDoc != NULL) {
12665 if (ctxt->wellFormed) {
12666 ret = ctxt->myDoc->extSubset;
12667 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012668 if (ret != NULL) {
12669 xmlNodePtr tmp;
12670
12671 ret->doc = NULL;
12672 tmp = ret->children;
12673 while (tmp != NULL) {
12674 tmp->doc = NULL;
12675 tmp = tmp->next;
12676 }
12677 }
Owen Taylor3473f882001-02-23 17:55:21 +000012678 } else {
12679 ret = NULL;
12680 }
12681 xmlFreeDoc(ctxt->myDoc);
12682 ctxt->myDoc = NULL;
12683 }
12684 if (sax != NULL) ctxt->sax = NULL;
12685 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012686
Owen Taylor3473f882001-02-23 17:55:21 +000012687 return(ret);
12688}
12689
12690/**
12691 * xmlSAXParseDTD:
12692 * @sax: the SAX handler block
12693 * @ExternalID: a NAME* containing the External ID of the DTD
12694 * @SystemID: a NAME* containing the URL to the DTD
12695 *
12696 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012697 *
Owen Taylor3473f882001-02-23 17:55:21 +000012698 * Returns the resulting xmlDtdPtr or NULL in case of error.
12699 */
12700
12701xmlDtdPtr
12702xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12703 const xmlChar *SystemID) {
12704 xmlDtdPtr ret = NULL;
12705 xmlParserCtxtPtr ctxt;
12706 xmlParserInputPtr input = NULL;
12707 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012708 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012709
12710 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12711
12712 ctxt = xmlNewParserCtxt();
12713 if (ctxt == NULL) {
12714 return(NULL);
12715 }
12716
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012717 /* We are loading a DTD */
12718 ctxt->options |= XML_PARSE_DTDLOAD;
12719
Owen Taylor3473f882001-02-23 17:55:21 +000012720 /*
12721 * Set-up the SAX context
12722 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012723 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012724 if (ctxt->sax != NULL)
12725 xmlFree(ctxt->sax);
12726 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012727 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012728 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012729
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012730 /*
12731 * Canonicalise the system ID
12732 */
12733 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012734 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012735 xmlFreeParserCtxt(ctxt);
12736 return(NULL);
12737 }
Owen Taylor3473f882001-02-23 17:55:21 +000012738
12739 /*
12740 * Ask the Entity resolver to load the damn thing
12741 */
12742
12743 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012744 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12745 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012746 if (input == NULL) {
12747 if (sax != NULL) ctxt->sax = NULL;
12748 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012749 if (systemIdCanonic != NULL)
12750 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012751 return(NULL);
12752 }
12753
12754 /*
12755 * plug some encoding conversion routines here.
12756 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012757 if (xmlPushInput(ctxt, input) < 0) {
12758 if (sax != NULL) ctxt->sax = NULL;
12759 xmlFreeParserCtxt(ctxt);
12760 if (systemIdCanonic != NULL)
12761 xmlFree(systemIdCanonic);
12762 return(NULL);
12763 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012764 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12765 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12766 xmlSwitchEncoding(ctxt, enc);
12767 }
Owen Taylor3473f882001-02-23 17:55:21 +000012768
12769 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012770 input->filename = (char *) systemIdCanonic;
12771 else
12772 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012773 input->line = 1;
12774 input->col = 1;
12775 input->base = ctxt->input->cur;
12776 input->cur = ctxt->input->cur;
12777 input->free = NULL;
12778
12779 /*
12780 * let's parse that entity knowing it's an external subset.
12781 */
12782 ctxt->inSubset = 2;
12783 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012784 if (ctxt->myDoc == NULL) {
12785 xmlErrMemory(ctxt, "New Doc failed");
12786 if (sax != NULL) ctxt->sax = NULL;
12787 xmlFreeParserCtxt(ctxt);
12788 return(NULL);
12789 }
12790 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012791 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12792 ExternalID, SystemID);
12793 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12794
12795 if (ctxt->myDoc != NULL) {
12796 if (ctxt->wellFormed) {
12797 ret = ctxt->myDoc->extSubset;
12798 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012799 if (ret != NULL) {
12800 xmlNodePtr tmp;
12801
12802 ret->doc = NULL;
12803 tmp = ret->children;
12804 while (tmp != NULL) {
12805 tmp->doc = NULL;
12806 tmp = tmp->next;
12807 }
12808 }
Owen Taylor3473f882001-02-23 17:55:21 +000012809 } else {
12810 ret = NULL;
12811 }
12812 xmlFreeDoc(ctxt->myDoc);
12813 ctxt->myDoc = NULL;
12814 }
12815 if (sax != NULL) ctxt->sax = NULL;
12816 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012817
Owen Taylor3473f882001-02-23 17:55:21 +000012818 return(ret);
12819}
12820
Daniel Veillard4432df22003-09-28 18:58:27 +000012821
Owen Taylor3473f882001-02-23 17:55:21 +000012822/**
12823 * xmlParseDTD:
12824 * @ExternalID: a NAME* containing the External ID of the DTD
12825 * @SystemID: a NAME* containing the URL to the DTD
12826 *
12827 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012828 *
Owen Taylor3473f882001-02-23 17:55:21 +000012829 * Returns the resulting xmlDtdPtr or NULL in case of error.
12830 */
12831
12832xmlDtdPtr
12833xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12834 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12835}
Daniel Veillard4432df22003-09-28 18:58:27 +000012836#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012837
12838/************************************************************************
12839 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012840 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012841 * *
12842 ************************************************************************/
12843
12844/**
Owen Taylor3473f882001-02-23 17:55:21 +000012845 * xmlParseCtxtExternalEntity:
12846 * @ctx: the existing parsing context
12847 * @URL: the URL for the entity to load
12848 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012849 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012850 *
12851 * Parse an external general entity within an existing parsing context
12852 * An external general parsed entity is well-formed if it matches the
12853 * production labeled extParsedEnt.
12854 *
12855 * [78] extParsedEnt ::= TextDecl? content
12856 *
12857 * Returns 0 if the entity is well formed, -1 in case of args problem and
12858 * the parser error code otherwise
12859 */
12860
12861int
12862xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012863 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012864 xmlParserCtxtPtr ctxt;
12865 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012866 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012867 xmlSAXHandlerPtr oldsax = NULL;
12868 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012869 xmlChar start[4];
12870 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012871
Daniel Veillardce682bc2004-11-05 17:22:25 +000012872 if (ctx == NULL) return(-1);
12873
Daniel Veillard0161e632008-08-28 15:36:32 +000012874 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12875 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012876 return(XML_ERR_ENTITY_LOOP);
12877 }
12878
Daniel Veillardcda96922001-08-21 10:56:31 +000012879 if (lst != NULL)
12880 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012881 if ((URL == NULL) && (ID == NULL))
12882 return(-1);
12883 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12884 return(-1);
12885
Rob Richards798743a2009-06-19 13:54:25 -040012886 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012887 if (ctxt == NULL) {
12888 return(-1);
12889 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012890
Owen Taylor3473f882001-02-23 17:55:21 +000012891 oldsax = ctxt->sax;
12892 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012893 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012894 newDoc = xmlNewDoc(BAD_CAST "1.0");
12895 if (newDoc == NULL) {
12896 xmlFreeParserCtxt(ctxt);
12897 return(-1);
12898 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012899 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012900 if (ctx->myDoc->dict) {
12901 newDoc->dict = ctx->myDoc->dict;
12902 xmlDictReference(newDoc->dict);
12903 }
Owen Taylor3473f882001-02-23 17:55:21 +000012904 if (ctx->myDoc != NULL) {
12905 newDoc->intSubset = ctx->myDoc->intSubset;
12906 newDoc->extSubset = ctx->myDoc->extSubset;
12907 }
12908 if (ctx->myDoc->URL != NULL) {
12909 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12910 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012911 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12912 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012913 ctxt->sax = oldsax;
12914 xmlFreeParserCtxt(ctxt);
12915 newDoc->intSubset = NULL;
12916 newDoc->extSubset = NULL;
12917 xmlFreeDoc(newDoc);
12918 return(-1);
12919 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012920 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012921 nodePush(ctxt, newDoc->children);
12922 if (ctx->myDoc == NULL) {
12923 ctxt->myDoc = newDoc;
12924 } else {
12925 ctxt->myDoc = ctx->myDoc;
12926 newDoc->children->doc = ctx->myDoc;
12927 }
12928
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012929 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012930 * Get the 4 first bytes and decode the charset
12931 * if enc != XML_CHAR_ENCODING_NONE
12932 * plug some encoding conversion routines.
12933 */
12934 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012935 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12936 start[0] = RAW;
12937 start[1] = NXT(1);
12938 start[2] = NXT(2);
12939 start[3] = NXT(3);
12940 enc = xmlDetectCharEncoding(start, 4);
12941 if (enc != XML_CHAR_ENCODING_NONE) {
12942 xmlSwitchEncoding(ctxt, enc);
12943 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012944 }
12945
Owen Taylor3473f882001-02-23 17:55:21 +000012946 /*
12947 * Parse a possible text declaration first
12948 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012949 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012950 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012951 /*
12952 * An XML-1.0 document can't reference an entity not XML-1.0
12953 */
12954 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12955 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012956 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012957 "Version mismatch between document and entity\n");
12958 }
Owen Taylor3473f882001-02-23 17:55:21 +000012959 }
12960
12961 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012962 * If the user provided its own SAX callbacks then reuse the
12963 * useData callback field, otherwise the expected setup in a
12964 * DOM builder is to have userData == ctxt
12965 */
12966 if (ctx->userData == ctx)
12967 ctxt->userData = ctxt;
12968 else
12969 ctxt->userData = ctx->userData;
12970
12971 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012972 * Doing validity checking on chunk doesn't make sense
12973 */
12974 ctxt->instate = XML_PARSER_CONTENT;
12975 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012976 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012977 ctxt->loadsubset = ctx->loadsubset;
12978 ctxt->depth = ctx->depth + 1;
12979 ctxt->replaceEntities = ctx->replaceEntities;
12980 if (ctxt->validate) {
12981 ctxt->vctxt.error = ctx->vctxt.error;
12982 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012983 } else {
12984 ctxt->vctxt.error = NULL;
12985 ctxt->vctxt.warning = NULL;
12986 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012987 ctxt->vctxt.nodeTab = NULL;
12988 ctxt->vctxt.nodeNr = 0;
12989 ctxt->vctxt.nodeMax = 0;
12990 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012991 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12992 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012993 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12994 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12995 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012996 ctxt->dictNames = ctx->dictNames;
12997 ctxt->attsDefault = ctx->attsDefault;
12998 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012999 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013000
13001 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013002
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013003 ctx->validate = ctxt->validate;
13004 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013005 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013006 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013007 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013008 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013009 }
13010 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013012 }
13013
13014 if (!ctxt->wellFormed) {
13015 if (ctxt->errNo == 0)
13016 ret = 1;
13017 else
13018 ret = ctxt->errNo;
13019 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013020 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013021 xmlNodePtr cur;
13022
13023 /*
13024 * Return the newly created nodeset after unlinking it from
13025 * they pseudo parent.
13026 */
13027 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013028 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013029 while (cur != NULL) {
13030 cur->parent = NULL;
13031 cur = cur->next;
13032 }
13033 newDoc->children->children = NULL;
13034 }
13035 ret = 0;
13036 }
13037 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013038 ctxt->dict = NULL;
13039 ctxt->attsDefault = NULL;
13040 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013041 xmlFreeParserCtxt(ctxt);
13042 newDoc->intSubset = NULL;
13043 newDoc->extSubset = NULL;
13044 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013045
Owen Taylor3473f882001-02-23 17:55:21 +000013046 return(ret);
13047}
13048
13049/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013050 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013051 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013052 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013053 * @sax: the SAX handler bloc (possibly NULL)
13054 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13055 * @depth: Used for loop detection, use 0
13056 * @URL: the URL for the entity to load
13057 * @ID: the System ID for the entity to load
13058 * @list: the return value for the set of parsed nodes
13059 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013060 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013061 *
13062 * Returns 0 if the entity is well formed, -1 in case of args problem and
13063 * the parser error code otherwise
13064 */
13065
Daniel Veillard7d515752003-09-26 19:12:37 +000013066static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013067xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13068 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013069 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013070 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013071 xmlParserCtxtPtr ctxt;
13072 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013073 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013074 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013075 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013076 xmlChar start[4];
13077 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013078
Daniel Veillard0161e632008-08-28 15:36:32 +000013079 if (((depth > 40) &&
13080 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13081 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013082 return(XML_ERR_ENTITY_LOOP);
13083 }
13084
Owen Taylor3473f882001-02-23 17:55:21 +000013085 if (list != NULL)
13086 *list = NULL;
13087 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013088 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013089 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013090 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013091
13092
Rob Richards9c0aa472009-03-26 18:10:19 +000013093 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013094 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013095 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013096 if (oldctxt != NULL) {
13097 ctxt->_private = oldctxt->_private;
13098 ctxt->loadsubset = oldctxt->loadsubset;
13099 ctxt->validate = oldctxt->validate;
13100 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013101 ctxt->record_info = oldctxt->record_info;
13102 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13103 ctxt->node_seq.length = oldctxt->node_seq.length;
13104 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013105 } else {
13106 /*
13107 * Doing validity checking on chunk without context
13108 * doesn't make sense
13109 */
13110 ctxt->_private = NULL;
13111 ctxt->validate = 0;
13112 ctxt->external = 2;
13113 ctxt->loadsubset = 0;
13114 }
Owen Taylor3473f882001-02-23 17:55:21 +000013115 if (sax != NULL) {
13116 oldsax = ctxt->sax;
13117 ctxt->sax = sax;
13118 if (user_data != NULL)
13119 ctxt->userData = user_data;
13120 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013121 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013122 newDoc = xmlNewDoc(BAD_CAST "1.0");
13123 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013124 ctxt->node_seq.maximum = 0;
13125 ctxt->node_seq.length = 0;
13126 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013127 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013128 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013129 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013130 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013131 newDoc->intSubset = doc->intSubset;
13132 newDoc->extSubset = doc->extSubset;
13133 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013134 xmlDictReference(newDoc->dict);
13135
Owen Taylor3473f882001-02-23 17:55:21 +000013136 if (doc->URL != NULL) {
13137 newDoc->URL = xmlStrdup(doc->URL);
13138 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013139 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13140 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013141 if (sax != NULL)
13142 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013143 ctxt->node_seq.maximum = 0;
13144 ctxt->node_seq.length = 0;
13145 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013146 xmlFreeParserCtxt(ctxt);
13147 newDoc->intSubset = NULL;
13148 newDoc->extSubset = NULL;
13149 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013150 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013151 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013152 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013153 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013154 ctxt->myDoc = doc;
13155 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013156
Daniel Veillard0161e632008-08-28 15:36:32 +000013157 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013158 * Get the 4 first bytes and decode the charset
13159 * if enc != XML_CHAR_ENCODING_NONE
13160 * plug some encoding conversion routines.
13161 */
13162 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013163 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13164 start[0] = RAW;
13165 start[1] = NXT(1);
13166 start[2] = NXT(2);
13167 start[3] = NXT(3);
13168 enc = xmlDetectCharEncoding(start, 4);
13169 if (enc != XML_CHAR_ENCODING_NONE) {
13170 xmlSwitchEncoding(ctxt, enc);
13171 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013172 }
13173
Owen Taylor3473f882001-02-23 17:55:21 +000013174 /*
13175 * Parse a possible text declaration first
13176 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013177 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013178 xmlParseTextDecl(ctxt);
13179 }
13180
Owen Taylor3473f882001-02-23 17:55:21 +000013181 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013182 ctxt->depth = depth;
13183
13184 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013185
Daniel Veillard561b7f82002-03-20 21:55:57 +000013186 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013187 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013188 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013189 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013190 }
13191 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013193 }
13194
13195 if (!ctxt->wellFormed) {
13196 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013197 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013198 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013199 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013200 } else {
13201 if (list != NULL) {
13202 xmlNodePtr cur;
13203
13204 /*
13205 * Return the newly created nodeset after unlinking it from
13206 * they pseudo parent.
13207 */
13208 cur = newDoc->children->children;
13209 *list = cur;
13210 while (cur != NULL) {
13211 cur->parent = NULL;
13212 cur = cur->next;
13213 }
13214 newDoc->children->children = NULL;
13215 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013216 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013217 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013218
13219 /*
13220 * Record in the parent context the number of entities replacement
13221 * done when parsing that reference.
13222 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013223 if (oldctxt != NULL)
13224 oldctxt->nbentities += ctxt->nbentities;
13225
Daniel Veillard0161e632008-08-28 15:36:32 +000013226 /*
13227 * Also record the size of the entity parsed
13228 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013229 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013230 oldctxt->sizeentities += ctxt->input->consumed;
13231 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13232 }
13233 /*
13234 * And record the last error if any
13235 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013236 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013237 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13238
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013239 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013240 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013241 if (oldctxt != NULL) {
13242 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13243 oldctxt->node_seq.length = ctxt->node_seq.length;
13244 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13245 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013246 ctxt->node_seq.maximum = 0;
13247 ctxt->node_seq.length = 0;
13248 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013249 xmlFreeParserCtxt(ctxt);
13250 newDoc->intSubset = NULL;
13251 newDoc->extSubset = NULL;
13252 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013253
Owen Taylor3473f882001-02-23 17:55:21 +000013254 return(ret);
13255}
13256
Daniel Veillard81273902003-09-30 00:43:48 +000013257#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013258/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013259 * xmlParseExternalEntity:
13260 * @doc: the document the chunk pertains to
13261 * @sax: the SAX handler bloc (possibly NULL)
13262 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13263 * @depth: Used for loop detection, use 0
13264 * @URL: the URL for the entity to load
13265 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013266 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013267 *
13268 * Parse an external general entity
13269 * An external general parsed entity is well-formed if it matches the
13270 * production labeled extParsedEnt.
13271 *
13272 * [78] extParsedEnt ::= TextDecl? content
13273 *
13274 * Returns 0 if the entity is well formed, -1 in case of args problem and
13275 * the parser error code otherwise
13276 */
13277
13278int
13279xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013280 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013281 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013282 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013283}
13284
13285/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013286 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013287 * @doc: the document the chunk pertains to
13288 * @sax: the SAX handler bloc (possibly NULL)
13289 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13290 * @depth: Used for loop detection, use 0
13291 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013292 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013293 *
13294 * Parse a well-balanced chunk of an XML document
13295 * called by the parser
13296 * The allowed sequence for the Well Balanced Chunk is the one defined by
13297 * the content production in the XML grammar:
13298 *
13299 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13300 *
13301 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13302 * the parser error code otherwise
13303 */
13304
13305int
13306xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013307 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013308 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13309 depth, string, lst, 0 );
13310}
Daniel Veillard81273902003-09-30 00:43:48 +000013311#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013312
13313/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013314 * xmlParseBalancedChunkMemoryInternal:
13315 * @oldctxt: the existing parsing context
13316 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13317 * @user_data: the user data field for the parser context
13318 * @lst: the return value for the set of parsed nodes
13319 *
13320 *
13321 * Parse a well-balanced chunk of an XML document
13322 * called by the parser
13323 * The allowed sequence for the Well Balanced Chunk is the one defined by
13324 * the content production in the XML grammar:
13325 *
13326 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13327 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013328 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13329 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013330 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013331 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013332 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013333 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013334static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013335xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13336 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13337 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013338 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013339 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013340 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013341 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013342 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013343 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013344 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013345#ifdef SAX2
13346 int i;
13347#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013348
Daniel Veillard0161e632008-08-28 15:36:32 +000013349 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13350 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013351 return(XML_ERR_ENTITY_LOOP);
13352 }
13353
13354
13355 if (lst != NULL)
13356 *lst = NULL;
13357 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013358 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013359
13360 size = xmlStrlen(string);
13361
13362 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013363 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013364 if (user_data != NULL)
13365 ctxt->userData = user_data;
13366 else
13367 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013368 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13369 ctxt->dict = oldctxt->dict;
Daniel Veillardad88b542017-12-08 09:42:31 +010013370 ctxt->input_id = oldctxt->input_id + 1;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013371 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13372 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13373 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013374
Daniel Veillard74eaec12009-08-26 15:57:20 +020013375#ifdef SAX2
13376 /* propagate namespaces down the entity */
13377 for (i = 0;i < oldctxt->nsNr;i += 2) {
13378 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13379 }
13380#endif
13381
Daniel Veillard328f48c2002-11-15 15:24:34 +000013382 oldsax = ctxt->sax;
13383 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013384 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013385 ctxt->replaceEntities = oldctxt->replaceEntities;
13386 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013387
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013388 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013389 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013390 newDoc = xmlNewDoc(BAD_CAST "1.0");
13391 if (newDoc == NULL) {
13392 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013393 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013394 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013395 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013396 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013397 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013398 newDoc->dict = ctxt->dict;
13399 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013400 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013401 } else {
13402 ctxt->myDoc = oldctxt->myDoc;
13403 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013404 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013405 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013406 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13407 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013408 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013409 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013410 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013411 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013412 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013413 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013414 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013415 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013416 ctxt->myDoc->children = NULL;
13417 ctxt->myDoc->last = NULL;
13418 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013419 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013420 ctxt->instate = XML_PARSER_CONTENT;
13421 ctxt->depth = oldctxt->depth + 1;
13422
Daniel Veillard328f48c2002-11-15 15:24:34 +000013423 ctxt->validate = 0;
13424 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013425 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13426 /*
13427 * ID/IDREF registration will be done in xmlValidateElement below
13428 */
13429 ctxt->loadsubset |= XML_SKIP_IDS;
13430 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013431 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013432 ctxt->attsDefault = oldctxt->attsDefault;
13433 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013434
Daniel Veillard68e9e742002-11-16 15:35:11 +000013435 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013436 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013437 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013438 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013439 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013440 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013441 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013443 }
13444
13445 if (!ctxt->wellFormed) {
13446 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013447 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013448 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013449 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013450 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013451 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013452 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013453
William M. Brack7b9154b2003-09-27 19:23:50 +000013454 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013455 xmlNodePtr cur;
13456
13457 /*
13458 * Return the newly created nodeset after unlinking it from
13459 * they pseudo parent.
13460 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013461 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013462 *lst = cur;
13463 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013464#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013465 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13466 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13467 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013468 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13469 oldctxt->myDoc, cur);
13470 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013471#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013472 cur->parent = NULL;
13473 cur = cur->next;
13474 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013475 ctxt->myDoc->children->children = NULL;
13476 }
13477 if (ctxt->myDoc != NULL) {
13478 xmlFreeNode(ctxt->myDoc->children);
13479 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013480 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013481 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013482
13483 /*
13484 * Record in the parent context the number of entities replacement
13485 * done when parsing that reference.
13486 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013487 if (oldctxt != NULL)
13488 oldctxt->nbentities += ctxt->nbentities;
13489
Daniel Veillard0161e632008-08-28 15:36:32 +000013490 /*
13491 * Also record the last error if any
13492 */
13493 if (ctxt->lastError.code != XML_ERR_OK)
13494 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13495
Daniel Veillard328f48c2002-11-15 15:24:34 +000013496 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013497 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013498 ctxt->attsDefault = NULL;
13499 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013500 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013501 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013502 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013503 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013504
Daniel Veillard328f48c2002-11-15 15:24:34 +000013505 return(ret);
13506}
13507
Daniel Veillard29b17482004-08-16 00:39:03 +000013508/**
13509 * xmlParseInNodeContext:
13510 * @node: the context node
13511 * @data: the input string
13512 * @datalen: the input string length in bytes
13513 * @options: a combination of xmlParserOption
13514 * @lst: the return value for the set of parsed nodes
13515 *
13516 * Parse a well-balanced chunk of an XML document
13517 * within the context (DTD, namespaces, etc ...) of the given node.
13518 *
13519 * The allowed sequence for the data is a Well Balanced Chunk defined by
13520 * the content production in the XML grammar:
13521 *
13522 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13523 *
13524 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13525 * error code otherwise
13526 */
13527xmlParserErrors
13528xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13529 int options, xmlNodePtr *lst) {
13530#ifdef SAX2
13531 xmlParserCtxtPtr ctxt;
13532 xmlDocPtr doc = NULL;
13533 xmlNodePtr fake, cur;
13534 int nsnr = 0;
13535
13536 xmlParserErrors ret = XML_ERR_OK;
13537
13538 /*
13539 * check all input parameters, grab the document
13540 */
13541 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13542 return(XML_ERR_INTERNAL_ERROR);
13543 switch (node->type) {
13544 case XML_ELEMENT_NODE:
13545 case XML_ATTRIBUTE_NODE:
13546 case XML_TEXT_NODE:
13547 case XML_CDATA_SECTION_NODE:
13548 case XML_ENTITY_REF_NODE:
13549 case XML_PI_NODE:
13550 case XML_COMMENT_NODE:
13551 case XML_DOCUMENT_NODE:
13552 case XML_HTML_DOCUMENT_NODE:
13553 break;
13554 default:
13555 return(XML_ERR_INTERNAL_ERROR);
13556
13557 }
13558 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13559 (node->type != XML_DOCUMENT_NODE) &&
13560 (node->type != XML_HTML_DOCUMENT_NODE))
13561 node = node->parent;
13562 if (node == NULL)
13563 return(XML_ERR_INTERNAL_ERROR);
13564 if (node->type == XML_ELEMENT_NODE)
13565 doc = node->doc;
13566 else
13567 doc = (xmlDocPtr) node;
13568 if (doc == NULL)
13569 return(XML_ERR_INTERNAL_ERROR);
13570
13571 /*
13572 * allocate a context and set-up everything not related to the
13573 * node position in the tree
13574 */
13575 if (doc->type == XML_DOCUMENT_NODE)
13576 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13577#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013578 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013579 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013580 /*
13581 * When parsing in context, it makes no sense to add implied
13582 * elements like html/body/etc...
13583 */
13584 options |= HTML_PARSE_NOIMPLIED;
13585 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013586#endif
13587 else
13588 return(XML_ERR_INTERNAL_ERROR);
13589
13590 if (ctxt == NULL)
13591 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013592
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013593 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013594 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13595 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13596 * we must wait until the last moment to free the original one.
13597 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013598 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013599 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013600 xmlDictFree(ctxt->dict);
13601 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013602 } else
13603 options |= XML_PARSE_NODICT;
13604
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013605 if (doc->encoding != NULL) {
13606 xmlCharEncodingHandlerPtr hdlr;
13607
13608 if (ctxt->encoding != NULL)
13609 xmlFree((xmlChar *) ctxt->encoding);
13610 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13611
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013612 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013613 if (hdlr != NULL) {
13614 xmlSwitchToEncoding(ctxt, hdlr);
13615 } else {
13616 return(XML_ERR_UNSUPPORTED_ENCODING);
13617 }
13618 }
13619
Daniel Veillard37334572008-07-31 08:20:02 +000013620 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013621 xmlDetectSAX2(ctxt);
13622 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013623 /* parsing in context, i.e. as within existing content */
Daniel Veillardad88b542017-12-08 09:42:31 +010013624 ctxt->input_id = 2;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013625 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013626
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013627 fake = xmlNewComment(NULL);
13628 if (fake == NULL) {
13629 xmlFreeParserCtxt(ctxt);
13630 return(XML_ERR_NO_MEMORY);
13631 }
13632 xmlAddChild(node, fake);
13633
Daniel Veillard29b17482004-08-16 00:39:03 +000013634 if (node->type == XML_ELEMENT_NODE) {
13635 nodePush(ctxt, node);
13636 /*
13637 * initialize the SAX2 namespaces stack
13638 */
13639 cur = node;
13640 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13641 xmlNsPtr ns = cur->nsDef;
13642 const xmlChar *iprefix, *ihref;
13643
13644 while (ns != NULL) {
13645 if (ctxt->dict) {
13646 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13647 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13648 } else {
13649 iprefix = ns->prefix;
13650 ihref = ns->href;
13651 }
13652
13653 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13654 nsPush(ctxt, iprefix, ihref);
13655 nsnr++;
13656 }
13657 ns = ns->next;
13658 }
13659 cur = cur->parent;
13660 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013661 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013662
13663 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13664 /*
13665 * ID/IDREF registration will be done in xmlValidateElement below
13666 */
13667 ctxt->loadsubset |= XML_SKIP_IDS;
13668 }
13669
Daniel Veillard499cc922006-01-18 17:22:35 +000013670#ifdef LIBXML_HTML_ENABLED
13671 if (doc->type == XML_HTML_DOCUMENT_NODE)
13672 __htmlParseContent(ctxt);
13673 else
13674#endif
13675 xmlParseContent(ctxt);
13676
Daniel Veillard29b17482004-08-16 00:39:03 +000013677 nsPop(ctxt, nsnr);
13678 if ((RAW == '<') && (NXT(1) == '/')) {
13679 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13680 } else if (RAW != 0) {
13681 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13682 }
13683 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13684 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685 ctxt->wellFormed = 0;
13686 }
13687
13688 if (!ctxt->wellFormed) {
13689 if (ctxt->errNo == 0)
13690 ret = XML_ERR_INTERNAL_ERROR;
13691 else
13692 ret = (xmlParserErrors)ctxt->errNo;
13693 } else {
13694 ret = XML_ERR_OK;
13695 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013696
Daniel Veillard29b17482004-08-16 00:39:03 +000013697 /*
13698 * Return the newly created nodeset after unlinking it from
13699 * the pseudo sibling.
13700 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013701
Daniel Veillard29b17482004-08-16 00:39:03 +000013702 cur = fake->next;
13703 fake->next = NULL;
13704 node->last = fake;
13705
13706 if (cur != NULL) {
13707 cur->prev = NULL;
13708 }
13709
13710 *lst = cur;
13711
13712 while (cur != NULL) {
13713 cur->parent = NULL;
13714 cur = cur->next;
13715 }
13716
13717 xmlUnlinkNode(fake);
13718 xmlFreeNode(fake);
13719
13720
13721 if (ret != XML_ERR_OK) {
13722 xmlFreeNodeList(*lst);
13723 *lst = NULL;
13724 }
William M. Brackc3f81342004-10-03 01:22:44 +000013725
William M. Brackb7b54de2004-10-06 16:38:01 +000013726 if (doc->dict != NULL)
13727 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013728 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013729
Daniel Veillard29b17482004-08-16 00:39:03 +000013730 return(ret);
13731#else /* !SAX2 */
13732 return(XML_ERR_INTERNAL_ERROR);
13733#endif
13734}
13735
Daniel Veillard81273902003-09-30 00:43:48 +000013736#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013737/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013738 * xmlParseBalancedChunkMemoryRecover:
13739 * @doc: the document the chunk pertains to
13740 * @sax: the SAX handler bloc (possibly NULL)
13741 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13742 * @depth: Used for loop detection, use 0
13743 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13744 * @lst: the return value for the set of parsed nodes
13745 * @recover: return nodes even if the data is broken (use 0)
13746 *
13747 *
13748 * Parse a well-balanced chunk of an XML document
13749 * called by the parser
13750 * The allowed sequence for the Well Balanced Chunk is the one defined by
13751 * the content production in the XML grammar:
13752 *
13753 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13754 *
13755 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13756 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013757 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013758 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013759 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13760 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013761 */
13762int
13763xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013764 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013765 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013766 xmlParserCtxtPtr ctxt;
13767 xmlDocPtr newDoc;
13768 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013769 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013770 int size;
13771 int ret = 0;
13772
Daniel Veillard0161e632008-08-28 15:36:32 +000013773 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013774 return(XML_ERR_ENTITY_LOOP);
13775 }
13776
13777
Daniel Veillardcda96922001-08-21 10:56:31 +000013778 if (lst != NULL)
13779 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013780 if (string == NULL)
13781 return(-1);
13782
13783 size = xmlStrlen(string);
13784
13785 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13786 if (ctxt == NULL) return(-1);
13787 ctxt->userData = ctxt;
13788 if (sax != NULL) {
13789 oldsax = ctxt->sax;
13790 ctxt->sax = sax;
13791 if (user_data != NULL)
13792 ctxt->userData = user_data;
13793 }
13794 newDoc = xmlNewDoc(BAD_CAST "1.0");
13795 if (newDoc == NULL) {
13796 xmlFreeParserCtxt(ctxt);
13797 return(-1);
13798 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013799 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013800 if ((doc != NULL) && (doc->dict != NULL)) {
13801 xmlDictFree(ctxt->dict);
13802 ctxt->dict = doc->dict;
13803 xmlDictReference(ctxt->dict);
13804 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13805 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13806 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13807 ctxt->dictNames = 1;
13808 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013809 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013810 }
Owen Taylor3473f882001-02-23 17:55:21 +000013811 if (doc != NULL) {
13812 newDoc->intSubset = doc->intSubset;
13813 newDoc->extSubset = doc->extSubset;
13814 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013815 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13816 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013817 if (sax != NULL)
13818 ctxt->sax = oldsax;
13819 xmlFreeParserCtxt(ctxt);
13820 newDoc->intSubset = NULL;
13821 newDoc->extSubset = NULL;
13822 xmlFreeDoc(newDoc);
13823 return(-1);
13824 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013825 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13826 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013827 if (doc == NULL) {
13828 ctxt->myDoc = newDoc;
13829 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013830 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013831 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013832 /* Ensure that doc has XML spec namespace */
13833 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13834 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013835 }
13836 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardad88b542017-12-08 09:42:31 +010013837 ctxt->input_id = 2;
Owen Taylor3473f882001-02-23 17:55:21 +000013838 ctxt->depth = depth;
13839
13840 /*
13841 * Doing validity checking on chunk doesn't make sense
13842 */
13843 ctxt->validate = 0;
13844 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013845 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013846
Daniel Veillardb39bc392002-10-26 19:29:51 +000013847 if ( doc != NULL ){
13848 content = doc->children;
13849 doc->children = NULL;
13850 xmlParseContent(ctxt);
13851 doc->children = content;
13852 }
13853 else {
13854 xmlParseContent(ctxt);
13855 }
Owen Taylor3473f882001-02-23 17:55:21 +000013856 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013857 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013858 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013859 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013860 }
13861 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013862 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013863 }
13864
13865 if (!ctxt->wellFormed) {
13866 if (ctxt->errNo == 0)
13867 ret = 1;
13868 else
13869 ret = ctxt->errNo;
13870 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013871 ret = 0;
13872 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013873
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013874 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13875 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013876
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013877 /*
13878 * Return the newly created nodeset after unlinking it from
13879 * they pseudo parent.
13880 */
13881 cur = newDoc->children->children;
13882 *lst = cur;
13883 while (cur != NULL) {
13884 xmlSetTreeDoc(cur, doc);
13885 cur->parent = NULL;
13886 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013887 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013888 newDoc->children->children = NULL;
13889 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013890
13891 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013892 ctxt->sax = oldsax;
13893 xmlFreeParserCtxt(ctxt);
13894 newDoc->intSubset = NULL;
13895 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013896 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013897 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013898
Owen Taylor3473f882001-02-23 17:55:21 +000013899 return(ret);
13900}
13901
13902/**
13903 * xmlSAXParseEntity:
13904 * @sax: the SAX handler block
13905 * @filename: the filename
13906 *
13907 * parse an XML external entity out of context and build a tree.
13908 * It use the given SAX function block to handle the parsing callback.
13909 * If sax is NULL, fallback to the default DOM tree building routines.
13910 *
13911 * [78] extParsedEnt ::= TextDecl? content
13912 *
13913 * This correspond to a "Well Balanced" chunk
13914 *
13915 * Returns the resulting document tree
13916 */
13917
13918xmlDocPtr
13919xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13920 xmlDocPtr ret;
13921 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013922
13923 ctxt = xmlCreateFileParserCtxt(filename);
13924 if (ctxt == NULL) {
13925 return(NULL);
13926 }
13927 if (sax != NULL) {
13928 if (ctxt->sax != NULL)
13929 xmlFree(ctxt->sax);
13930 ctxt->sax = sax;
13931 ctxt->userData = NULL;
13932 }
13933
Owen Taylor3473f882001-02-23 17:55:21 +000013934 xmlParseExtParsedEnt(ctxt);
13935
13936 if (ctxt->wellFormed)
13937 ret = ctxt->myDoc;
13938 else {
13939 ret = NULL;
13940 xmlFreeDoc(ctxt->myDoc);
13941 ctxt->myDoc = NULL;
13942 }
13943 if (sax != NULL)
13944 ctxt->sax = NULL;
13945 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013946
Owen Taylor3473f882001-02-23 17:55:21 +000013947 return(ret);
13948}
13949
13950/**
13951 * xmlParseEntity:
13952 * @filename: the filename
13953 *
13954 * parse an XML external entity out of context and build a tree.
13955 *
13956 * [78] extParsedEnt ::= TextDecl? content
13957 *
13958 * This correspond to a "Well Balanced" chunk
13959 *
13960 * Returns the resulting document tree
13961 */
13962
13963xmlDocPtr
13964xmlParseEntity(const char *filename) {
13965 return(xmlSAXParseEntity(NULL, filename));
13966}
Daniel Veillard81273902003-09-30 00:43:48 +000013967#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013968
13969/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013970 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013971 * @URL: the entity URL
13972 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013973 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013974 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013975 *
13976 * Create a parser context for an external entity
13977 * Automatic support for ZLIB/Compress compressed document is provided
13978 * by default if found at compile-time.
13979 *
13980 * Returns the new parser context or NULL
13981 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013982static xmlParserCtxtPtr
13983xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13984 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013985 xmlParserCtxtPtr ctxt;
13986 xmlParserInputPtr inputStream;
13987 char *directory = NULL;
13988 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013989
Owen Taylor3473f882001-02-23 17:55:21 +000013990 ctxt = xmlNewParserCtxt();
13991 if (ctxt == NULL) {
13992 return(NULL);
13993 }
13994
Daniel Veillard48247b42009-07-10 16:12:46 +020013995 if (pctx != NULL) {
13996 ctxt->options = pctx->options;
13997 ctxt->_private = pctx->_private;
Daniel Veillardad88b542017-12-08 09:42:31 +010013998 /*
13999 * this is a subparser of pctx, so the input_id should be
14000 * incremented to distinguish from main entity
14001 */
14002 ctxt->input_id = pctx->input_id + 1;
Rob Richards9c0aa472009-03-26 18:10:19 +000014003 }
14004
Owen Taylor3473f882001-02-23 17:55:21 +000014005 uri = xmlBuildURI(URL, base);
14006
14007 if (uri == NULL) {
14008 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14009 if (inputStream == NULL) {
14010 xmlFreeParserCtxt(ctxt);
14011 return(NULL);
14012 }
14013
14014 inputPush(ctxt, inputStream);
14015
14016 if ((ctxt->directory == NULL) && (directory == NULL))
14017 directory = xmlParserGetDirectory((char *)URL);
14018 if ((ctxt->directory == NULL) && (directory != NULL))
14019 ctxt->directory = directory;
14020 } else {
14021 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14022 if (inputStream == NULL) {
14023 xmlFree(uri);
14024 xmlFreeParserCtxt(ctxt);
14025 return(NULL);
14026 }
14027
14028 inputPush(ctxt, inputStream);
14029
14030 if ((ctxt->directory == NULL) && (directory == NULL))
14031 directory = xmlParserGetDirectory((char *)uri);
14032 if ((ctxt->directory == NULL) && (directory != NULL))
14033 ctxt->directory = directory;
14034 xmlFree(uri);
14035 }
Owen Taylor3473f882001-02-23 17:55:21 +000014036 return(ctxt);
14037}
14038
Rob Richards9c0aa472009-03-26 18:10:19 +000014039/**
14040 * xmlCreateEntityParserCtxt:
14041 * @URL: the entity URL
14042 * @ID: the entity PUBLIC ID
14043 * @base: a possible base for the target URI
14044 *
14045 * Create a parser context for an external entity
14046 * Automatic support for ZLIB/Compress compressed document is provided
14047 * by default if found at compile-time.
14048 *
14049 * Returns the new parser context or NULL
14050 */
14051xmlParserCtxtPtr
14052xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14053 const xmlChar *base) {
14054 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14055
14056}
14057
Owen Taylor3473f882001-02-23 17:55:21 +000014058/************************************************************************
14059 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014060 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014061 * *
14062 ************************************************************************/
14063
14064/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014065 * xmlCreateURLParserCtxt:
14066 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014067 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014068 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014069 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014070 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014071 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014072 *
14073 * Returns the new parser context or NULL
14074 */
14075xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014076xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014077{
14078 xmlParserCtxtPtr ctxt;
14079 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014080 char *directory = NULL;
14081
Owen Taylor3473f882001-02-23 17:55:21 +000014082 ctxt = xmlNewParserCtxt();
14083 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014084 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014085 return(NULL);
14086 }
14087
Daniel Veillarddf292f72005-01-16 19:00:15 +000014088 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014089 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014090 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014091
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014092 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014093 if (inputStream == NULL) {
14094 xmlFreeParserCtxt(ctxt);
14095 return(NULL);
14096 }
14097
Owen Taylor3473f882001-02-23 17:55:21 +000014098 inputPush(ctxt, inputStream);
14099 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014100 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014101 if ((ctxt->directory == NULL) && (directory != NULL))
14102 ctxt->directory = directory;
14103
14104 return(ctxt);
14105}
14106
Daniel Veillard61b93382003-11-03 14:28:31 +000014107/**
14108 * xmlCreateFileParserCtxt:
14109 * @filename: the filename
14110 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014111 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014112 * Automatic support for ZLIB/Compress compressed document is provided
14113 * by default if found at compile-time.
14114 *
14115 * Returns the new parser context or NULL
14116 */
14117xmlParserCtxtPtr
14118xmlCreateFileParserCtxt(const char *filename)
14119{
14120 return(xmlCreateURLParserCtxt(filename, 0));
14121}
14122
Daniel Veillard81273902003-09-30 00:43:48 +000014123#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014124/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014125 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014126 * @sax: the SAX handler block
14127 * @filename: the filename
14128 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14129 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014130 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014131 *
14132 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14133 * compressed document is provided by default if found at compile-time.
14134 * It use the given SAX function block to handle the parsing callback.
14135 * If sax is NULL, fallback to the default DOM tree building routines.
14136 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014137 * User data (void *) is stored within the parser context in the
14138 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014139 *
Owen Taylor3473f882001-02-23 17:55:21 +000014140 * Returns the resulting document tree
14141 */
14142
14143xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014144xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14145 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014146 xmlDocPtr ret;
14147 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014148
Daniel Veillard635ef722001-10-29 11:48:19 +000014149 xmlInitParser();
14150
Owen Taylor3473f882001-02-23 17:55:21 +000014151 ctxt = xmlCreateFileParserCtxt(filename);
14152 if (ctxt == NULL) {
14153 return(NULL);
14154 }
14155 if (sax != NULL) {
14156 if (ctxt->sax != NULL)
14157 xmlFree(ctxt->sax);
14158 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014159 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014160 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014161 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014162 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014163 }
Owen Taylor3473f882001-02-23 17:55:21 +000014164
Daniel Veillard37d2d162008-03-14 10:54:00 +000014165 if (ctxt->directory == NULL)
14166 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014167
Daniel Veillarddad3f682002-11-17 16:47:27 +000014168 ctxt->recovery = recovery;
14169
Owen Taylor3473f882001-02-23 17:55:21 +000014170 xmlParseDocument(ctxt);
14171
William M. Brackc07329e2003-09-08 01:57:30 +000014172 if ((ctxt->wellFormed) || recovery) {
14173 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014174 if (ret != NULL) {
14175 if (ctxt->input->buf->compressed > 0)
14176 ret->compression = 9;
14177 else
14178 ret->compression = ctxt->input->buf->compressed;
14179 }
William M. Brackc07329e2003-09-08 01:57:30 +000014180 }
Owen Taylor3473f882001-02-23 17:55:21 +000014181 else {
14182 ret = NULL;
14183 xmlFreeDoc(ctxt->myDoc);
14184 ctxt->myDoc = NULL;
14185 }
14186 if (sax != NULL)
14187 ctxt->sax = NULL;
14188 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014189
Owen Taylor3473f882001-02-23 17:55:21 +000014190 return(ret);
14191}
14192
14193/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014194 * xmlSAXParseFile:
14195 * @sax: the SAX handler block
14196 * @filename: the filename
14197 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14198 * documents
14199 *
14200 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14201 * compressed document is provided by default if found at compile-time.
14202 * It use the given SAX function block to handle the parsing callback.
14203 * If sax is NULL, fallback to the default DOM tree building routines.
14204 *
14205 * Returns the resulting document tree
14206 */
14207
14208xmlDocPtr
14209xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14210 int recovery) {
14211 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14212}
14213
14214/**
Owen Taylor3473f882001-02-23 17:55:21 +000014215 * xmlRecoverDoc:
14216 * @cur: a pointer to an array of xmlChar
14217 *
14218 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014219 * In the case the document is not Well Formed, a attempt to build a
14220 * tree is tried anyway
14221 *
14222 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014223 */
14224
14225xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014226xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014227 return(xmlSAXParseDoc(NULL, cur, 1));
14228}
14229
14230/**
14231 * xmlParseFile:
14232 * @filename: the filename
14233 *
14234 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14235 * compressed document is provided by default if found at compile-time.
14236 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014237 * Returns the resulting document tree if the file was wellformed,
14238 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014239 */
14240
14241xmlDocPtr
14242xmlParseFile(const char *filename) {
14243 return(xmlSAXParseFile(NULL, filename, 0));
14244}
14245
14246/**
14247 * xmlRecoverFile:
14248 * @filename: the filename
14249 *
14250 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14251 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014252 * In the case the document is not Well Formed, it attempts to build
14253 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014254 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014255 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014256 */
14257
14258xmlDocPtr
14259xmlRecoverFile(const char *filename) {
14260 return(xmlSAXParseFile(NULL, filename, 1));
14261}
14262
14263
14264/**
14265 * xmlSetupParserForBuffer:
14266 * @ctxt: an XML parser context
14267 * @buffer: a xmlChar * buffer
14268 * @filename: a file name
14269 *
14270 * Setup the parser context to parse a new buffer; Clears any prior
14271 * contents from the parser context. The buffer parameter must not be
14272 * NULL, but the filename parameter can be
14273 */
14274void
14275xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14276 const char* filename)
14277{
14278 xmlParserInputPtr input;
14279
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014280 if ((ctxt == NULL) || (buffer == NULL))
14281 return;
14282
Owen Taylor3473f882001-02-23 17:55:21 +000014283 input = xmlNewInputStream(ctxt);
14284 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014285 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014286 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014287 return;
14288 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014289
Owen Taylor3473f882001-02-23 17:55:21 +000014290 xmlClearParserCtxt(ctxt);
14291 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014292 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014293 input->base = buffer;
14294 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014295 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014296 inputPush(ctxt, input);
14297}
14298
14299/**
14300 * xmlSAXUserParseFile:
14301 * @sax: a SAX handler
14302 * @user_data: The user data returned on SAX callbacks
14303 * @filename: a file name
14304 *
14305 * parse an XML file and call the given SAX handler routines.
14306 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014307 *
Owen Taylor3473f882001-02-23 17:55:21 +000014308 * Returns 0 in case of success or a error number otherwise
14309 */
14310int
14311xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14312 const char *filename) {
14313 int ret = 0;
14314 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014315
Owen Taylor3473f882001-02-23 17:55:21 +000014316 ctxt = xmlCreateFileParserCtxt(filename);
14317 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014318 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014319 xmlFree(ctxt->sax);
14320 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014321 xmlDetectSAX2(ctxt);
14322
Owen Taylor3473f882001-02-23 17:55:21 +000014323 if (user_data != NULL)
14324 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014325
Owen Taylor3473f882001-02-23 17:55:21 +000014326 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014327
Owen Taylor3473f882001-02-23 17:55:21 +000014328 if (ctxt->wellFormed)
14329 ret = 0;
14330 else {
14331 if (ctxt->errNo != 0)
14332 ret = ctxt->errNo;
14333 else
14334 ret = -1;
14335 }
14336 if (sax != NULL)
14337 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014338 if (ctxt->myDoc != NULL) {
14339 xmlFreeDoc(ctxt->myDoc);
14340 ctxt->myDoc = NULL;
14341 }
Owen Taylor3473f882001-02-23 17:55:21 +000014342 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014343
Owen Taylor3473f882001-02-23 17:55:21 +000014344 return ret;
14345}
Daniel Veillard81273902003-09-30 00:43:48 +000014346#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014347
14348/************************************************************************
14349 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014350 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014351 * *
14352 ************************************************************************/
14353
14354/**
14355 * xmlCreateMemoryParserCtxt:
14356 * @buffer: a pointer to a char array
14357 * @size: the size of the array
14358 *
14359 * Create a parser context for an XML in-memory document.
14360 *
14361 * Returns the new parser context or NULL
14362 */
14363xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014364xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014365 xmlParserCtxtPtr ctxt;
14366 xmlParserInputPtr input;
14367 xmlParserInputBufferPtr buf;
14368
14369 if (buffer == NULL)
14370 return(NULL);
14371 if (size <= 0)
14372 return(NULL);
14373
14374 ctxt = xmlNewParserCtxt();
14375 if (ctxt == NULL)
14376 return(NULL);
14377
Daniel Veillard53350552003-09-18 13:35:51 +000014378 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014379 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014380 if (buf == NULL) {
14381 xmlFreeParserCtxt(ctxt);
14382 return(NULL);
14383 }
Owen Taylor3473f882001-02-23 17:55:21 +000014384
14385 input = xmlNewInputStream(ctxt);
14386 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014387 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014388 xmlFreeParserCtxt(ctxt);
14389 return(NULL);
14390 }
14391
14392 input->filename = NULL;
14393 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014394 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014395
14396 inputPush(ctxt, input);
14397 return(ctxt);
14398}
14399
Daniel Veillard81273902003-09-30 00:43:48 +000014400#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014401/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014402 * xmlSAXParseMemoryWithData:
14403 * @sax: the SAX handler block
14404 * @buffer: an pointer to a char array
14405 * @size: the size of the array
14406 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14407 * documents
14408 * @data: the userdata
14409 *
14410 * parse an XML in-memory block and use the given SAX function block
14411 * to handle the parsing callback. If sax is NULL, fallback to the default
14412 * DOM tree building routines.
14413 *
14414 * User data (void *) is stored within the parser context in the
14415 * context's _private member, so it is available nearly everywhere in libxml
14416 *
14417 * Returns the resulting document tree
14418 */
14419
14420xmlDocPtr
14421xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14422 int size, int recovery, void *data) {
14423 xmlDocPtr ret;
14424 xmlParserCtxtPtr ctxt;
14425
Daniel Veillardab2a7632009-07-09 08:45:03 +020014426 xmlInitParser();
14427
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014428 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14429 if (ctxt == NULL) return(NULL);
14430 if (sax != NULL) {
14431 if (ctxt->sax != NULL)
14432 xmlFree(ctxt->sax);
14433 ctxt->sax = sax;
14434 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014435 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014436 if (data!=NULL) {
14437 ctxt->_private=data;
14438 }
14439
Daniel Veillardadba5f12003-04-04 16:09:01 +000014440 ctxt->recovery = recovery;
14441
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014442 xmlParseDocument(ctxt);
14443
14444 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14445 else {
14446 ret = NULL;
14447 xmlFreeDoc(ctxt->myDoc);
14448 ctxt->myDoc = NULL;
14449 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014450 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014451 ctxt->sax = NULL;
14452 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014453
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014454 return(ret);
14455}
14456
14457/**
Owen Taylor3473f882001-02-23 17:55:21 +000014458 * xmlSAXParseMemory:
14459 * @sax: the SAX handler block
14460 * @buffer: an pointer to a char array
14461 * @size: the size of the array
14462 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14463 * documents
14464 *
14465 * parse an XML in-memory block and use the given SAX function block
14466 * to handle the parsing callback. If sax is NULL, fallback to the default
14467 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014468 *
Owen Taylor3473f882001-02-23 17:55:21 +000014469 * Returns the resulting document tree
14470 */
14471xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014472xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14473 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014474 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014475}
14476
14477/**
14478 * xmlParseMemory:
14479 * @buffer: an pointer to a char array
14480 * @size: the size of the array
14481 *
14482 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014483 *
Owen Taylor3473f882001-02-23 17:55:21 +000014484 * Returns the resulting document tree
14485 */
14486
Daniel Veillard50822cb2001-07-26 20:05:51 +000014487xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014488 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14489}
14490
14491/**
14492 * xmlRecoverMemory:
14493 * @buffer: an pointer to a char array
14494 * @size: the size of the array
14495 *
14496 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014497 * In the case the document is not Well Formed, an attempt to
14498 * build a tree is tried anyway
14499 *
14500 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014501 */
14502
Daniel Veillard50822cb2001-07-26 20:05:51 +000014503xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014504 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14505}
14506
14507/**
14508 * xmlSAXUserParseMemory:
14509 * @sax: a SAX handler
14510 * @user_data: The user data returned on SAX callbacks
14511 * @buffer: an in-memory XML document input
14512 * @size: the length of the XML document in bytes
14513 *
14514 * A better SAX parsing routine.
14515 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014516 *
Owen Taylor3473f882001-02-23 17:55:21 +000014517 * Returns 0 in case of success or a error number otherwise
14518 */
14519int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014520 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014521 int ret = 0;
14522 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014523
14524 xmlInitParser();
14525
Owen Taylor3473f882001-02-23 17:55:21 +000014526 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14527 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014528 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14529 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014530 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014531 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014532
Daniel Veillard30211a02001-04-26 09:33:18 +000014533 if (user_data != NULL)
14534 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014535
Owen Taylor3473f882001-02-23 17:55:21 +000014536 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014537
Owen Taylor3473f882001-02-23 17:55:21 +000014538 if (ctxt->wellFormed)
14539 ret = 0;
14540 else {
14541 if (ctxt->errNo != 0)
14542 ret = ctxt->errNo;
14543 else
14544 ret = -1;
14545 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014546 if (sax != NULL)
14547 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014548 if (ctxt->myDoc != NULL) {
14549 xmlFreeDoc(ctxt->myDoc);
14550 ctxt->myDoc = NULL;
14551 }
Owen Taylor3473f882001-02-23 17:55:21 +000014552 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014553
Owen Taylor3473f882001-02-23 17:55:21 +000014554 return ret;
14555}
Daniel Veillard81273902003-09-30 00:43:48 +000014556#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014557
14558/**
14559 * xmlCreateDocParserCtxt:
14560 * @cur: a pointer to an array of xmlChar
14561 *
14562 * Creates a parser context for an XML in-memory document.
14563 *
14564 * Returns the new parser context or NULL
14565 */
14566xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014567xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014568 int len;
14569
14570 if (cur == NULL)
14571 return(NULL);
14572 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014573 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014574}
14575
Daniel Veillard81273902003-09-30 00:43:48 +000014576#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014577/**
14578 * xmlSAXParseDoc:
14579 * @sax: the SAX handler block
14580 * @cur: a pointer to an array of xmlChar
14581 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14582 * documents
14583 *
14584 * parse an XML in-memory document and build a tree.
14585 * It use the given SAX function block to handle the parsing callback.
14586 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014587 *
Owen Taylor3473f882001-02-23 17:55:21 +000014588 * Returns the resulting document tree
14589 */
14590
14591xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014592xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014593 xmlDocPtr ret;
14594 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014595 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014596
Daniel Veillard38936062004-11-04 17:45:11 +000014597 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014598
14599
14600 ctxt = xmlCreateDocParserCtxt(cur);
14601 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014602 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014603 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014604 ctxt->sax = sax;
14605 ctxt->userData = NULL;
14606 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014607 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014608
14609 xmlParseDocument(ctxt);
14610 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14611 else {
14612 ret = NULL;
14613 xmlFreeDoc(ctxt->myDoc);
14614 ctxt->myDoc = NULL;
14615 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014616 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014617 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014618 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014619
Owen Taylor3473f882001-02-23 17:55:21 +000014620 return(ret);
14621}
14622
14623/**
14624 * xmlParseDoc:
14625 * @cur: a pointer to an array of xmlChar
14626 *
14627 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014628 *
Owen Taylor3473f882001-02-23 17:55:21 +000014629 * Returns the resulting document tree
14630 */
14631
14632xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014633xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014634 return(xmlSAXParseDoc(NULL, cur, 0));
14635}
Daniel Veillard81273902003-09-30 00:43:48 +000014636#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014637
Daniel Veillard81273902003-09-30 00:43:48 +000014638#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014639/************************************************************************
14640 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014641 * Specific function to keep track of entities references *
14642 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014643 * *
14644 ************************************************************************/
14645
14646static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14647
14648/**
14649 * xmlAddEntityReference:
14650 * @ent : A valid entity
14651 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014652 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014653 *
14654 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14655 */
14656static void
14657xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14658 xmlNodePtr lastNode)
14659{
14660 if (xmlEntityRefFunc != NULL) {
14661 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14662 }
14663}
14664
14665
14666/**
14667 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014668 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014669 *
14670 * Set the function to call call back when a xml reference has been made
14671 */
14672void
14673xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14674{
14675 xmlEntityRefFunc = func;
14676}
Daniel Veillard81273902003-09-30 00:43:48 +000014677#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014678
14679/************************************************************************
14680 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014681 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014682 * *
14683 ************************************************************************/
14684
14685#ifdef LIBXML_XPATH_ENABLED
14686#include <libxml/xpath.h>
14687#endif
14688
Daniel Veillardffa3c742005-07-21 13:24:09 +000014689extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014690static int xmlParserInitialized = 0;
14691
14692/**
14693 * xmlInitParser:
14694 *
14695 * Initialization function for the XML parser.
14696 * This is not reentrant. Call once before processing in case of
14697 * use in multithreaded programs.
14698 */
14699
14700void
14701xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014702 if (xmlParserInitialized != 0)
14703 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014704
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014705#ifdef LIBXML_THREAD_ENABLED
14706 __xmlGlobalInitMutexLock();
14707 if (xmlParserInitialized == 0) {
14708#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014709 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014710 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014711 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14712 (xmlGenericError == NULL))
14713 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014714 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014715 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014716 xmlInitCharEncodingHandlers();
14717 xmlDefaultSAXHandlerInit();
14718 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014719#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014720 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014721#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014722#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014723 htmlInitAutoClose();
14724 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014725#endif
14726#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014727 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014728#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014729 xmlParserInitialized = 1;
14730#ifdef LIBXML_THREAD_ENABLED
14731 }
14732 __xmlGlobalInitMutexUnlock();
14733#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014734}
14735
14736/**
14737 * xmlCleanupParser:
14738 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014739 * This function name is somewhat misleading. It does not clean up
14740 * parser state, it cleans up memory allocated by the library itself.
14741 * It is a cleanup function for the XML library. It tries to reclaim all
14742 * related global memory allocated for the library processing.
14743 * It doesn't deallocate any document related memory. One should
14744 * call xmlCleanupParser() only when the process has finished using
14745 * the library and all XML/HTML documents built with it.
14746 * See also xmlInitParser() which has the opposite function of preparing
14747 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014748 *
14749 * WARNING: if your application is multithreaded or has plugin support
14750 * calling this may crash the application if another thread or
14751 * a plugin is still using libxml2. It's sometimes very hard to
14752 * guess if libxml2 is in use in the application, some libraries
14753 * or plugins may use it without notice. In case of doubt abstain
14754 * from calling this function or do it just before calling exit()
14755 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014756 */
14757
14758void
14759xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014760 if (!xmlParserInitialized)
14761 return;
14762
Owen Taylor3473f882001-02-23 17:55:21 +000014763 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014764#ifdef LIBXML_CATALOG_ENABLED
14765 xmlCatalogCleanup();
14766#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014767 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014768 xmlCleanupInputCallbacks();
14769#ifdef LIBXML_OUTPUT_ENABLED
14770 xmlCleanupOutputCallbacks();
14771#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014772#ifdef LIBXML_SCHEMAS_ENABLED
14773 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014774 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014775#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014776 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014777 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014778 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014779 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014780 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014781}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014782
14783/************************************************************************
14784 * *
14785 * New set (2.6.0) of simpler and more flexible APIs *
14786 * *
14787 ************************************************************************/
14788
14789/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014790 * DICT_FREE:
14791 * @str: a string
14792 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014793 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014794 * current scope
14795 */
14796#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014797 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014798 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14799 xmlFree((char *)(str));
14800
14801/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014802 * xmlCtxtReset:
14803 * @ctxt: an XML parser context
14804 *
14805 * Reset a parser context
14806 */
14807void
14808xmlCtxtReset(xmlParserCtxtPtr ctxt)
14809{
14810 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014811 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014812
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014813 if (ctxt == NULL)
14814 return;
14815
14816 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014817
14818 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14819 xmlFreeInputStream(input);
14820 }
14821 ctxt->inputNr = 0;
14822 ctxt->input = NULL;
14823
14824 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014825 if (ctxt->spaceTab != NULL) {
14826 ctxt->spaceTab[0] = -1;
14827 ctxt->space = &ctxt->spaceTab[0];
14828 } else {
14829 ctxt->space = NULL;
14830 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014831
14832
14833 ctxt->nodeNr = 0;
14834 ctxt->node = NULL;
14835
14836 ctxt->nameNr = 0;
14837 ctxt->name = NULL;
14838
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014839 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014840 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014841 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014842 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014843 DICT_FREE(ctxt->directory);
14844 ctxt->directory = NULL;
14845 DICT_FREE(ctxt->extSubURI);
14846 ctxt->extSubURI = NULL;
14847 DICT_FREE(ctxt->extSubSystem);
14848 ctxt->extSubSystem = NULL;
14849 if (ctxt->myDoc != NULL)
14850 xmlFreeDoc(ctxt->myDoc);
14851 ctxt->myDoc = NULL;
14852
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014853 ctxt->standalone = -1;
14854 ctxt->hasExternalSubset = 0;
14855 ctxt->hasPErefs = 0;
14856 ctxt->html = 0;
14857 ctxt->external = 0;
14858 ctxt->instate = XML_PARSER_START;
14859 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014860
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014861 ctxt->wellFormed = 1;
14862 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014863 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014864 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014865#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014866 ctxt->vctxt.userData = ctxt;
14867 ctxt->vctxt.error = xmlParserValidityError;
14868 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014869#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014870 ctxt->record_info = 0;
14871 ctxt->nbChars = 0;
14872 ctxt->checkIndex = 0;
14873 ctxt->inSubset = 0;
14874 ctxt->errNo = XML_ERR_OK;
14875 ctxt->depth = 0;
14876 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14877 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014878 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014879 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014880 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014881 xmlInitNodeInfoSeq(&ctxt->node_seq);
14882
14883 if (ctxt->attsDefault != NULL) {
Nick Wellnhofere03f0a12017-11-09 16:42:47 +010014884 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014885 ctxt->attsDefault = NULL;
14886 }
14887 if (ctxt->attsSpecial != NULL) {
14888 xmlHashFree(ctxt->attsSpecial, NULL);
14889 ctxt->attsSpecial = NULL;
14890 }
14891
Daniel Veillard4432df22003-09-28 18:58:27 +000014892#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014893 if (ctxt->catalogs != NULL)
14894 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014895#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014896 if (ctxt->lastError.code != XML_ERR_OK)
14897 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014898}
14899
14900/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014901 * xmlCtxtResetPush:
14902 * @ctxt: an XML parser context
14903 * @chunk: a pointer to an array of chars
14904 * @size: number of chars in the array
14905 * @filename: an optional file name or URI
14906 * @encoding: the document encoding, or NULL
14907 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014908 * Reset a push parser context
14909 *
14910 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014911 */
14912int
14913xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14914 int size, const char *filename, const char *encoding)
14915{
14916 xmlParserInputPtr inputStream;
14917 xmlParserInputBufferPtr buf;
14918 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14919
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014920 if (ctxt == NULL)
14921 return(1);
14922
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014923 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14924 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14925
14926 buf = xmlAllocParserInputBuffer(enc);
14927 if (buf == NULL)
14928 return(1);
14929
14930 if (ctxt == NULL) {
14931 xmlFreeParserInputBuffer(buf);
14932 return(1);
14933 }
14934
14935 xmlCtxtReset(ctxt);
14936
14937 if (ctxt->pushTab == NULL) {
14938 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14939 sizeof(xmlChar *));
14940 if (ctxt->pushTab == NULL) {
14941 xmlErrMemory(ctxt, NULL);
14942 xmlFreeParserInputBuffer(buf);
14943 return(1);
14944 }
14945 }
14946
14947 if (filename == NULL) {
14948 ctxt->directory = NULL;
14949 } else {
14950 ctxt->directory = xmlParserGetDirectory(filename);
14951 }
14952
14953 inputStream = xmlNewInputStream(ctxt);
14954 if (inputStream == NULL) {
14955 xmlFreeParserInputBuffer(buf);
14956 return(1);
14957 }
14958
14959 if (filename == NULL)
14960 inputStream->filename = NULL;
14961 else
14962 inputStream->filename = (char *)
14963 xmlCanonicPath((const xmlChar *) filename);
14964 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014965 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014966
14967 inputPush(ctxt, inputStream);
14968
14969 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14970 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014971 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14972 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014973
14974 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14975
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014976 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014977#ifdef DEBUG_PUSH
14978 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14979#endif
14980 }
14981
14982 if (encoding != NULL) {
14983 xmlCharEncodingHandlerPtr hdlr;
14984
Daniel Veillard37334572008-07-31 08:20:02 +000014985 if (ctxt->encoding != NULL)
14986 xmlFree((xmlChar *) ctxt->encoding);
14987 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14988
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014989 hdlr = xmlFindCharEncodingHandler(encoding);
14990 if (hdlr != NULL) {
14991 xmlSwitchToEncoding(ctxt, hdlr);
14992 } else {
14993 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14994 "Unsupported encoding %s\n", BAD_CAST encoding);
14995 }
14996 } else if (enc != XML_CHAR_ENCODING_NONE) {
14997 xmlSwitchEncoding(ctxt, enc);
14998 }
14999
15000 return(0);
15001}
15002
Daniel Veillard37334572008-07-31 08:20:02 +000015003
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015004/**
Daniel Veillard37334572008-07-31 08:20:02 +000015005 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015006 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015007 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015008 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015009 *
15010 * Applies the options to the parser context
15011 *
15012 * Returns 0 in case of success, the set of unknown or unimplemented options
15013 * in case of error.
15014 */
Daniel Veillard37334572008-07-31 08:20:02 +000015015static int
15016xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015017{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015018 if (ctxt == NULL)
15019 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015020 if (encoding != NULL) {
15021 if (ctxt->encoding != NULL)
15022 xmlFree((xmlChar *) ctxt->encoding);
15023 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15024 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015025 if (options & XML_PARSE_RECOVER) {
15026 ctxt->recovery = 1;
15027 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015028 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015029 } else
15030 ctxt->recovery = 0;
15031 if (options & XML_PARSE_DTDLOAD) {
15032 ctxt->loadsubset = XML_DETECT_IDS;
15033 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015034 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015035 } else
15036 ctxt->loadsubset = 0;
15037 if (options & XML_PARSE_DTDATTR) {
15038 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15039 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015040 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015041 }
15042 if (options & XML_PARSE_NOENT) {
15043 ctxt->replaceEntities = 1;
15044 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15045 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015046 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015047 } else
15048 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015049 if (options & XML_PARSE_PEDANTIC) {
15050 ctxt->pedantic = 1;
15051 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015052 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015053 } else
15054 ctxt->pedantic = 0;
15055 if (options & XML_PARSE_NOBLANKS) {
15056 ctxt->keepBlanks = 0;
15057 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15058 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015059 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015060 } else
15061 ctxt->keepBlanks = 1;
15062 if (options & XML_PARSE_DTDVALID) {
15063 ctxt->validate = 1;
15064 if (options & XML_PARSE_NOWARNING)
15065 ctxt->vctxt.warning = NULL;
15066 if (options & XML_PARSE_NOERROR)
15067 ctxt->vctxt.error = NULL;
15068 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015069 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015070 } else
15071 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015072 if (options & XML_PARSE_NOWARNING) {
15073 ctxt->sax->warning = NULL;
15074 options -= XML_PARSE_NOWARNING;
15075 }
15076 if (options & XML_PARSE_NOERROR) {
15077 ctxt->sax->error = NULL;
15078 ctxt->sax->fatalError = NULL;
15079 options -= XML_PARSE_NOERROR;
15080 }
Daniel Veillard81273902003-09-30 00:43:48 +000015081#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015082 if (options & XML_PARSE_SAX1) {
15083 ctxt->sax->startElement = xmlSAX2StartElement;
15084 ctxt->sax->endElement = xmlSAX2EndElement;
15085 ctxt->sax->startElementNs = NULL;
15086 ctxt->sax->endElementNs = NULL;
15087 ctxt->sax->initialized = 1;
15088 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015089 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015090 }
Daniel Veillard81273902003-09-30 00:43:48 +000015091#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015092 if (options & XML_PARSE_NODICT) {
15093 ctxt->dictNames = 0;
15094 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015095 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015096 } else {
15097 ctxt->dictNames = 1;
15098 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015099 if (options & XML_PARSE_NOCDATA) {
15100 ctxt->sax->cdataBlock = NULL;
15101 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015102 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015103 }
15104 if (options & XML_PARSE_NSCLEAN) {
15105 ctxt->options |= XML_PARSE_NSCLEAN;
15106 options -= XML_PARSE_NSCLEAN;
15107 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015108 if (options & XML_PARSE_NONET) {
15109 ctxt->options |= XML_PARSE_NONET;
15110 options -= XML_PARSE_NONET;
15111 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015112 if (options & XML_PARSE_COMPACT) {
15113 ctxt->options |= XML_PARSE_COMPACT;
15114 options -= XML_PARSE_COMPACT;
15115 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015116 if (options & XML_PARSE_OLD10) {
15117 ctxt->options |= XML_PARSE_OLD10;
15118 options -= XML_PARSE_OLD10;
15119 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015120 if (options & XML_PARSE_NOBASEFIX) {
15121 ctxt->options |= XML_PARSE_NOBASEFIX;
15122 options -= XML_PARSE_NOBASEFIX;
15123 }
15124 if (options & XML_PARSE_HUGE) {
15125 ctxt->options |= XML_PARSE_HUGE;
15126 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015127 if (ctxt->dict != NULL)
15128 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015129 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015130 if (options & XML_PARSE_OLDSAX) {
15131 ctxt->options |= XML_PARSE_OLDSAX;
15132 options -= XML_PARSE_OLDSAX;
15133 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015134 if (options & XML_PARSE_IGNORE_ENC) {
15135 ctxt->options |= XML_PARSE_IGNORE_ENC;
15136 options -= XML_PARSE_IGNORE_ENC;
15137 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015138 if (options & XML_PARSE_BIG_LINES) {
15139 ctxt->options |= XML_PARSE_BIG_LINES;
15140 options -= XML_PARSE_BIG_LINES;
15141 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015142 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015143 return (options);
15144}
15145
15146/**
Daniel Veillard37334572008-07-31 08:20:02 +000015147 * xmlCtxtUseOptions:
15148 * @ctxt: an XML parser context
15149 * @options: a combination of xmlParserOption
15150 *
15151 * Applies the options to the parser context
15152 *
15153 * Returns 0 in case of success, the set of unknown or unimplemented options
15154 * in case of error.
15155 */
15156int
15157xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15158{
15159 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15160}
15161
15162/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015163 * xmlDoRead:
15164 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015165 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015166 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015167 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015168 * @reuse: keep the context for reuse
15169 *
15170 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015171 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015172 * Returns the resulting document tree or NULL
15173 */
15174static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015175xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15176 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015177{
15178 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015179
15180 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015181 if (encoding != NULL) {
15182 xmlCharEncodingHandlerPtr hdlr;
15183
15184 hdlr = xmlFindCharEncodingHandler(encoding);
15185 if (hdlr != NULL)
15186 xmlSwitchToEncoding(ctxt, hdlr);
15187 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015188 if ((URL != NULL) && (ctxt->input != NULL) &&
15189 (ctxt->input->filename == NULL))
15190 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015191 xmlParseDocument(ctxt);
15192 if ((ctxt->wellFormed) || ctxt->recovery)
15193 ret = ctxt->myDoc;
15194 else {
15195 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015196 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015197 xmlFreeDoc(ctxt->myDoc);
15198 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015199 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015200 ctxt->myDoc = NULL;
15201 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015202 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015203 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015204
15205 return (ret);
15206}
15207
15208/**
15209 * xmlReadDoc:
15210 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015211 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015212 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015213 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015214 *
15215 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015216 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015217 * Returns the resulting document tree
15218 */
15219xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015220xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015221{
15222 xmlParserCtxtPtr ctxt;
15223
15224 if (cur == NULL)
15225 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015226 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015227
15228 ctxt = xmlCreateDocParserCtxt(cur);
15229 if (ctxt == NULL)
15230 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015231 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015232}
15233
15234/**
15235 * xmlReadFile:
15236 * @filename: a file or URL
15237 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015238 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015239 *
15240 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015241 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015242 * Returns the resulting document tree
15243 */
15244xmlDocPtr
15245xmlReadFile(const char *filename, const char *encoding, int options)
15246{
15247 xmlParserCtxtPtr ctxt;
15248
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015249 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015250 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015251 if (ctxt == NULL)
15252 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015253 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015254}
15255
15256/**
15257 * xmlReadMemory:
15258 * @buffer: a pointer to a char array
15259 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015260 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015261 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015262 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015263 *
15264 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015265 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266 * Returns the resulting document tree
15267 */
15268xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015269xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015270{
15271 xmlParserCtxtPtr ctxt;
15272
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015273 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015274 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15275 if (ctxt == NULL)
15276 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015277 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015278}
15279
15280/**
15281 * xmlReadFd:
15282 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015283 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015284 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015285 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015286 *
15287 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015288 * NOTE that the file descriptor will not be closed when the
15289 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015290 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291 * Returns the resulting document tree
15292 */
15293xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015294xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015295{
15296 xmlParserCtxtPtr ctxt;
15297 xmlParserInputBufferPtr input;
15298 xmlParserInputPtr stream;
15299
15300 if (fd < 0)
15301 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015302 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015303
15304 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15305 if (input == NULL)
15306 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015307 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015308 ctxt = xmlNewParserCtxt();
15309 if (ctxt == NULL) {
15310 xmlFreeParserInputBuffer(input);
15311 return (NULL);
15312 }
15313 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15314 if (stream == NULL) {
15315 xmlFreeParserInputBuffer(input);
15316 xmlFreeParserCtxt(ctxt);
15317 return (NULL);
15318 }
15319 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015320 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015321}
15322
15323/**
15324 * xmlReadIO:
15325 * @ioread: an I/O read function
15326 * @ioclose: an I/O close function
15327 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015328 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015329 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015330 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015331 *
15332 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015333 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015334 * Returns the resulting document tree
15335 */
15336xmlDocPtr
15337xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015338 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015339{
15340 xmlParserCtxtPtr ctxt;
15341 xmlParserInputBufferPtr input;
15342 xmlParserInputPtr stream;
15343
15344 if (ioread == NULL)
15345 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015346 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015347
15348 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15349 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015350 if (input == NULL) {
15351 if (ioclose != NULL)
15352 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015353 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015354 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015355 ctxt = xmlNewParserCtxt();
15356 if (ctxt == NULL) {
15357 xmlFreeParserInputBuffer(input);
15358 return (NULL);
15359 }
15360 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15361 if (stream == NULL) {
15362 xmlFreeParserInputBuffer(input);
15363 xmlFreeParserCtxt(ctxt);
15364 return (NULL);
15365 }
15366 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015367 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015368}
15369
15370/**
15371 * xmlCtxtReadDoc:
15372 * @ctxt: an XML parser context
15373 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015374 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015375 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015376 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015377 *
15378 * parse an XML in-memory document and build a tree.
15379 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015380 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015381 * Returns the resulting document tree
15382 */
15383xmlDocPtr
15384xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015385 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015386{
15387 xmlParserInputPtr stream;
15388
15389 if (cur == NULL)
15390 return (NULL);
15391 if (ctxt == NULL)
15392 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015393 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015394
15395 xmlCtxtReset(ctxt);
15396
15397 stream = xmlNewStringInputStream(ctxt, cur);
15398 if (stream == NULL) {
15399 return (NULL);
15400 }
15401 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015402 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015403}
15404
15405/**
15406 * xmlCtxtReadFile:
15407 * @ctxt: an XML parser context
15408 * @filename: a file or URL
15409 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015410 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015411 *
15412 * parse an XML file from the filesystem or the network.
15413 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015414 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015415 * Returns the resulting document tree
15416 */
15417xmlDocPtr
15418xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15419 const char *encoding, int options)
15420{
15421 xmlParserInputPtr stream;
15422
15423 if (filename == NULL)
15424 return (NULL);
15425 if (ctxt == NULL)
15426 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015427 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428
15429 xmlCtxtReset(ctxt);
15430
Daniel Veillard29614c72004-11-26 10:47:26 +000015431 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015432 if (stream == NULL) {
15433 return (NULL);
15434 }
15435 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015436 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015437}
15438
15439/**
15440 * xmlCtxtReadMemory:
15441 * @ctxt: an XML parser context
15442 * @buffer: a pointer to a char array
15443 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015444 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015445 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015446 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015447 *
15448 * parse an XML in-memory document and build a tree.
15449 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015450 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015451 * Returns the resulting document tree
15452 */
15453xmlDocPtr
15454xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015455 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015456{
15457 xmlParserInputBufferPtr input;
15458 xmlParserInputPtr stream;
15459
15460 if (ctxt == NULL)
15461 return (NULL);
15462 if (buffer == NULL)
15463 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015464 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015465
15466 xmlCtxtReset(ctxt);
15467
15468 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15469 if (input == NULL) {
15470 return(NULL);
15471 }
15472
15473 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15474 if (stream == NULL) {
15475 xmlFreeParserInputBuffer(input);
15476 return(NULL);
15477 }
15478
15479 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015480 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015481}
15482
15483/**
15484 * xmlCtxtReadFd:
15485 * @ctxt: an XML parser context
15486 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015487 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015488 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015489 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015490 *
15491 * parse an XML from a file descriptor and build a tree.
15492 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015493 * NOTE that the file descriptor will not be closed when the
15494 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015495 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015496 * Returns the resulting document tree
15497 */
15498xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015499xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15500 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015501{
15502 xmlParserInputBufferPtr input;
15503 xmlParserInputPtr stream;
15504
15505 if (fd < 0)
15506 return (NULL);
15507 if (ctxt == NULL)
15508 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015509 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015510
15511 xmlCtxtReset(ctxt);
15512
15513
15514 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15515 if (input == NULL)
15516 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015517 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015518 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15519 if (stream == NULL) {
15520 xmlFreeParserInputBuffer(input);
15521 return (NULL);
15522 }
15523 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015524 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015525}
15526
15527/**
15528 * xmlCtxtReadIO:
15529 * @ctxt: an XML parser context
15530 * @ioread: an I/O read function
15531 * @ioclose: an I/O close function
15532 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015533 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015534 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015535 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015536 *
15537 * parse an XML document from I/O functions and source and build a tree.
15538 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015539 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015540 * Returns the resulting document tree
15541 */
15542xmlDocPtr
15543xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15544 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015545 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015546 const char *encoding, int options)
15547{
15548 xmlParserInputBufferPtr input;
15549 xmlParserInputPtr stream;
15550
15551 if (ioread == NULL)
15552 return (NULL);
15553 if (ctxt == NULL)
15554 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015555 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015556
15557 xmlCtxtReset(ctxt);
15558
15559 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15560 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015561 if (input == NULL) {
15562 if (ioclose != NULL)
15563 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015564 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015565 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015566 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15567 if (stream == NULL) {
15568 xmlFreeParserInputBuffer(input);
15569 return (NULL);
15570 }
15571 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015572 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015573}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015574
15575#define bottom_parser
15576#include "elfgcchack.h"