blob: 5813a6643e1580b89359066afbbb5fa164d5e5cc [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Nick Wellnhofere3890542017-10-09 00:20:01 +020041#if defined(_WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080097static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
Daniel Veillard0161e632008-08-28 15:36:32 +000099/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_PARSER_BIG_ENTITY 1000
106#define XML_PARSER_LOT_ENTITY 5000
107
108/*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114#define XML_PARSER_NON_LINEAR 10
115
116/*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800127 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000128{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800129 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
Peter Simons8f30bdf2016-04-15 11:56:55 +0200148 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200151 --ctxt->depth;
Nick Wellnhofer707ad082018-01-23 16:37:54 +0100152 if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbdd66182016-05-23 12:27:58 +0800153 ent->content[0] = 0;
154 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800204 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800223 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000224 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000229 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232}
233
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000234/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000235 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000236 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000241 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000242unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000243
Daniel Veillard0fb18932003-09-07 09:14:37 +0000244
Daniel Veillard0161e632008-08-28 15:36:32 +0000245
246#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000247#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000248#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000249#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
Daniel Veillard1f972e92012-08-15 10:16:37 +0800251/**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260#define XML_PARSER_CHUNK_SIZE 100
261
Owen Taylor3473f882001-02-23 17:55:21 +0000262/*
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * List of XML prefixed PI allowed by W3C specs
264 */
265
Daniel Veillardb44025c2001-10-11 22:55:55 +0000266static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000267 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800268 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000269 NULL
270};
271
Daniel Veillarda07050d2003-10-19 14:46:32 +0000272
Owen Taylor3473f882001-02-23 17:55:21 +0000273/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200274static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000276
Daniel Veillard7d515752003-09-26 19:12:37 +0000277static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000278xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000280 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000281 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard37334572008-07-31 08:20:02 +0000283static int
284xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000286#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000287static void
288xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000290#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000291
Daniel Veillard7d515752003-09-26 19:12:37 +0000292static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000293xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000295
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000296static int
297xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
Daniel Veillarde57ec792003-09-10 10:50:59 +0000299/************************************************************************
300 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800301 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 * *
303 ************************************************************************/
304
305/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313static void
314xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316{
Daniel Veillard157fee02003-10-31 10:36:03 +0000317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200322
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000323 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000328 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339}
340
341/**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351{
352 const char *errmsg;
353
Daniel Veillard157fee02003-10-31 10:36:03 +0000354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "internal error";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "Fragment not allowed";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800443 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800462 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000464 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800465 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800502 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800505 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000506 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000507 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800508 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000509 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800511 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000512 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000513 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000525 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800528 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800530 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000531#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000532 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800533 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000534 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000535#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000536 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800537 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000538 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000539 if (ctxt != NULL)
540 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000555}
556
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000557/**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800565static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000566xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000568{
Daniel Veillard157fee02003-10-31 10:36:03 +0000569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000572 if (ctxt != NULL)
573 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581}
582
583/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800593static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000594xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000597 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000598
Daniel Veillard157fee02003-10-31 10:36:03 +0000599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000604 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000620}
621
622/**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000629 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000630 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800631static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000632xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000633 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000634{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000635 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000652 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000659 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000660}
661
662/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800671static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000672xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000673 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000674{
Daniel Veillard157fee02003-10-31 10:36:03 +0000675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000678 if (ctxt != NULL)
679 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000680 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000688}
689
690/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800701static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000702xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800703 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000704 const xmlChar *str2)
705{
Daniel Veillard157fee02003-10-31 10:36:03 +0000706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL)
710 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000711 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000720}
721
722/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800731static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000732xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000733 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000734{
Daniel Veillard157fee02003-10-31 10:36:03 +0000735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000738 if (ctxt != NULL)
739 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000749}
750
751/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800760static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000761xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763{
Daniel Veillard157fee02003-10-31 10:36:03 +0000764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000767 if (ctxt != NULL)
768 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773}
774
775/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800785static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000786xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000790{
Daniel Veillard157fee02003-10-31 10:36:03 +0000791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000794 if (ctxt != NULL)
795 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000802}
803
Daniel Veillard37334572008-07-31 08:20:02 +0000804/**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800812 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000813 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800814static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000815xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819{
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827}
828
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000829/************************************************************************
830 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800831 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832 * *
833 ************************************************************************/
834
835/**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845int
846xmlHasFeature(xmlFeature feature)
847{
848 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000849 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850#ifdef LIBXML_THREAD_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_TREE_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_PUSH_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_READER_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_WRITER_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_SAX1_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_FTP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_HTTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_VALID_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_HTML_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_C14N_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef LIBXML_XPATH_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_XPTR_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000951 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000952#ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000957 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000958#ifdef LIBXML_ICONV_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000963 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000964#ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000969 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000970#ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000975 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000976#ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000981 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000982#ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000987 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000988#ifdef LIBXML_EXPR_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000993 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000994#ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000999 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001000#ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001005 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001006#ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001011 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012#ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001017 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001018#ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020#else
1021 return(0);
1022#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001023 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001024#ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026#else
1027 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001028#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001029 case XML_WITH_ZLIB:
1030#ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032#else
1033 return(0);
1034#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001035 case XML_WITH_LZMA:
1036#ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001041 case XML_WITH_ICU:
1042#ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001047 default:
1048 break;
1049 }
1050 return(0);
1051}
1052
1053/************************************************************************
1054 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001055 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 * *
1057 ************************************************************************/
1058
1059/**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065static void
1066xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001068#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001072#else
1073 ctxt->sax2 = 1;
1074#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001081 xmlErrMemory(ctxt, NULL);
1082 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083}
1084
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085typedef struct _xmlDefAttrs xmlDefAttrs;
1086typedef xmlDefAttrs *xmlDefAttrsPtr;
1087struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001090#if __STDC_VERSION__ >= 199901L
1091 /* Using a C99 flexible array member avoids UBSan errors. */
1092 const xmlChar *values[]; /* array of localname/prefix/values/external */
1093#else
1094 const xmlChar *values[5];
1095#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001097
1098/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001099 * xmlAttrNormalizeSpace:
1100 * @src: the source string
1101 * @dst: the target string
1102 *
1103 * Normalize the space in non CDATA attribute values:
1104 * If the attribute type is not CDATA, then the XML processor MUST further
1105 * process the normalized attribute value by discarding any leading and
1106 * trailing space (#x20) characters, and by replacing sequences of space
1107 * (#x20) characters by a single space (#x20) character.
1108 * Note that the size of dst need to be at least src, and if one doesn't need
1109 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1110 * passing src as dst is just fine.
1111 *
1112 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1113 * is needed.
1114 */
1115static xmlChar *
1116xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1117{
1118 if ((src == NULL) || (dst == NULL))
1119 return(NULL);
1120
1121 while (*src == 0x20) src++;
1122 while (*src != 0) {
1123 if (*src == 0x20) {
1124 while (*src == 0x20) src++;
1125 if (*src != 0)
1126 *dst++ = 0x20;
1127 } else {
1128 *dst++ = *src++;
1129 }
1130 }
1131 *dst = 0;
1132 if (dst == src)
1133 return(NULL);
1134 return(dst);
1135}
1136
1137/**
1138 * xmlAttrNormalizeSpace2:
1139 * @src: the source string
1140 *
1141 * Normalize the space in non CDATA attribute values, a slightly more complex
1142 * front end to avoid allocation problems when running on attribute values
1143 * coming from the input.
1144 *
1145 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1146 * is needed.
1147 */
1148static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001149xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001150{
1151 int i;
1152 int remove_head = 0;
1153 int need_realloc = 0;
1154 const xmlChar *cur;
1155
1156 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1157 return(NULL);
1158 i = *len;
1159 if (i <= 0)
1160 return(NULL);
1161
1162 cur = src;
1163 while (*cur == 0x20) {
1164 cur++;
1165 remove_head++;
1166 }
1167 while (*cur != 0) {
1168 if (*cur == 0x20) {
1169 cur++;
1170 if ((*cur == 0x20) || (*cur == 0)) {
1171 need_realloc = 1;
1172 break;
1173 }
1174 } else
1175 cur++;
1176 }
1177 if (need_realloc) {
1178 xmlChar *ret;
1179
1180 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1181 if (ret == NULL) {
1182 xmlErrMemory(ctxt, NULL);
1183 return(NULL);
1184 }
1185 xmlAttrNormalizeSpace(ret, ret);
1186 *len = (int) strlen((const char *)ret);
1187 return(ret);
1188 } else if (remove_head) {
1189 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001190 memmove(src, src + remove_head, 1 + *len);
1191 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001192 }
1193 return(NULL);
1194}
1195
1196/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001197 * xmlAddDefAttrs:
1198 * @ctxt: an XML parser context
1199 * @fullname: the element fullname
1200 * @fullattr: the attribute fullname
1201 * @value: the attribute value
1202 *
1203 * Add a defaulted attribute for an element
1204 */
1205static void
1206xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1207 const xmlChar *fullname,
1208 const xmlChar *fullattr,
1209 const xmlChar *value) {
1210 xmlDefAttrsPtr defaults;
1211 int len;
1212 const xmlChar *name;
1213 const xmlChar *prefix;
1214
Daniel Veillard6a31b832008-03-26 14:06:44 +00001215 /*
1216 * Allows to detect attribute redefinitions
1217 */
1218 if (ctxt->attsSpecial != NULL) {
1219 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1220 return;
1221 }
1222
Daniel Veillarde57ec792003-09-10 10:50:59 +00001223 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001224 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001225 if (ctxt->attsDefault == NULL)
1226 goto mem_error;
1227 }
1228
1229 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001230 * split the element name into prefix:localname , the string found
1231 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 */
1233 name = xmlSplitQName3(fullname, &len);
1234 if (name == NULL) {
1235 name = xmlDictLookup(ctxt->dict, fullname, -1);
1236 prefix = NULL;
1237 } else {
1238 name = xmlDictLookup(ctxt->dict, name, -1);
1239 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1240 }
1241
1242 /*
1243 * make sure there is some storage
1244 */
1245 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1246 if (defaults == NULL) {
1247 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001248 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 if (defaults == NULL)
1250 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001251 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001252 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001253 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1254 defaults, NULL) < 0) {
1255 xmlFree(defaults);
1256 goto mem_error;
1257 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001259 xmlDefAttrsPtr temp;
1260
1261 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001262 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001263 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001265 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001266 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001267 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1268 defaults, NULL) < 0) {
1269 xmlFree(defaults);
1270 goto mem_error;
1271 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001272 }
1273
1274 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001275 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001276 * are within the DTD and hen not associated to namespace names.
1277 */
1278 name = xmlSplitQName3(fullattr, &len);
1279 if (name == NULL) {
1280 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1281 prefix = NULL;
1282 } else {
1283 name = xmlDictLookup(ctxt->dict, name, -1);
1284 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1285 }
1286
Daniel Veillardae0765b2008-07-31 19:54:59 +00001287 defaults->values[5 * defaults->nbAttrs] = name;
1288 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001289 /* intern the string and precompute the end */
1290 len = xmlStrlen(value);
1291 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001292 defaults->values[5 * defaults->nbAttrs + 2] = value;
1293 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1294 if (ctxt->external)
1295 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1296 else
1297 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001298 defaults->nbAttrs++;
1299
1300 return;
1301
1302mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001303 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001304 return;
1305}
1306
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001307/**
1308 * xmlAddSpecialAttr:
1309 * @ctxt: an XML parser context
1310 * @fullname: the element fullname
1311 * @fullattr: the attribute fullname
1312 * @type: the attribute type
1313 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001314 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001315 */
1316static void
1317xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1318 const xmlChar *fullname,
1319 const xmlChar *fullattr,
1320 int type)
1321{
1322 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001323 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001324 if (ctxt->attsSpecial == NULL)
1325 goto mem_error;
1326 }
1327
Daniel Veillardac4118d2008-01-11 05:27:32 +00001328 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1329 return;
1330
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001331 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001332 (void *) (ptrdiff_t) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001333 return;
1334
1335mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001336 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001337 return;
1338}
1339
Daniel Veillard4432df22003-09-28 18:58:27 +00001340/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001341 * xmlCleanSpecialAttrCallback:
1342 *
1343 * Removes CDATA attributes from the special attribute table
1344 */
1345static void
1346xmlCleanSpecialAttrCallback(void *payload, void *data,
1347 const xmlChar *fullname, const xmlChar *fullattr,
1348 const xmlChar *unused ATTRIBUTE_UNUSED) {
1349 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1350
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001351 if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001352 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1353 }
1354}
1355
1356/**
1357 * xmlCleanSpecialAttr:
1358 * @ctxt: an XML parser context
1359 *
1360 * Trim the list of attributes defined to remove all those of type
1361 * CDATA as they are not special. This call should be done when finishing
1362 * to parse the DTD and before starting to parse the document root.
1363 */
1364static void
1365xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1366{
1367 if (ctxt->attsSpecial == NULL)
1368 return;
1369
1370 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1371
1372 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1373 xmlHashFree(ctxt->attsSpecial, NULL);
1374 ctxt->attsSpecial = NULL;
1375 }
1376 return;
1377}
1378
1379/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001380 * xmlCheckLanguageID:
1381 * @lang: pointer to the string value
1382 *
1383 * Checks that the value conforms to the LanguageID production:
1384 *
1385 * NOTE: this is somewhat deprecated, those productions were removed from
1386 * the XML Second edition.
1387 *
1388 * [33] LanguageID ::= Langcode ('-' Subcode)*
1389 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1390 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1391 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1392 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1393 * [38] Subcode ::= ([a-z] | [A-Z])+
1394 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001395 * The current REC reference the sucessors of RFC 1766, currently 5646
1396 *
1397 * http://www.rfc-editor.org/rfc/rfc5646.txt
1398 * langtag = language
1399 * ["-" script]
1400 * ["-" region]
1401 * *("-" variant)
1402 * *("-" extension)
1403 * ["-" privateuse]
1404 * language = 2*3ALPHA ; shortest ISO 639 code
1405 * ["-" extlang] ; sometimes followed by
1406 * ; extended language subtags
1407 * / 4ALPHA ; or reserved for future use
1408 * / 5*8ALPHA ; or registered language subtag
1409 *
1410 * extlang = 3ALPHA ; selected ISO 639 codes
1411 * *2("-" 3ALPHA) ; permanently reserved
1412 *
1413 * script = 4ALPHA ; ISO 15924 code
1414 *
1415 * region = 2ALPHA ; ISO 3166-1 code
1416 * / 3DIGIT ; UN M.49 code
1417 *
1418 * variant = 5*8alphanum ; registered variants
1419 * / (DIGIT 3alphanum)
1420 *
1421 * extension = singleton 1*("-" (2*8alphanum))
1422 *
1423 * ; Single alphanumerics
1424 * ; "x" reserved for private use
1425 * singleton = DIGIT ; 0 - 9
1426 * / %x41-57 ; A - W
1427 * / %x59-5A ; Y - Z
1428 * / %x61-77 ; a - w
1429 * / %x79-7A ; y - z
1430 *
1431 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1432 * The parser below doesn't try to cope with extension or privateuse
1433 * that could be added but that's not interoperable anyway
1434 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001435 * Returns 1 if correct 0 otherwise
1436 **/
1437int
1438xmlCheckLanguageID(const xmlChar * lang)
1439{
Daniel Veillard60587d62010-11-04 15:16:27 +01001440 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001441
1442 if (cur == NULL)
1443 return (0);
1444 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001445 ((cur[0] == 'I') && (cur[1] == '-')) ||
1446 ((cur[0] == 'x') && (cur[1] == '-')) ||
1447 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001448 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001449 * Still allow IANA code and user code which were coming
1450 * from the previous version of the XML-1.0 specification
1451 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001452 */
1453 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001454 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001455 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1456 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001457 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001458 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001459 nxt = cur;
1460 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1461 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1462 nxt++;
1463 if (nxt - cur >= 4) {
1464 /*
1465 * Reserved
1466 */
1467 if ((nxt - cur > 8) || (nxt[0] != 0))
1468 return(0);
1469 return(1);
1470 }
1471 if (nxt - cur < 2)
1472 return(0);
1473 /* we got an ISO 639 code */
1474 if (nxt[0] == 0)
1475 return(1);
1476 if (nxt[0] != '-')
1477 return(0);
1478
1479 nxt++;
1480 cur = nxt;
1481 /* now we can have extlang or script or region or variant */
1482 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1483 goto region_m49;
1484
1485 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1486 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1487 nxt++;
1488 if (nxt - cur == 4)
1489 goto script;
1490 if (nxt - cur == 2)
1491 goto region;
1492 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1493 goto variant;
1494 if (nxt - cur != 3)
1495 return(0);
1496 /* we parsed an extlang */
1497 if (nxt[0] == 0)
1498 return(1);
1499 if (nxt[0] != '-')
1500 return(0);
1501
1502 nxt++;
1503 cur = nxt;
1504 /* now we can have script or region or variant */
1505 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1506 goto region_m49;
1507
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511 if (nxt - cur == 2)
1512 goto region;
1513 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1514 goto variant;
1515 if (nxt - cur != 4)
1516 return(0);
1517 /* we parsed a script */
1518script:
1519 if (nxt[0] == 0)
1520 return(1);
1521 if (nxt[0] != '-')
1522 return(0);
1523
1524 nxt++;
1525 cur = nxt;
1526 /* now we can have region or variant */
1527 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1528 goto region_m49;
1529
1530 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1531 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1532 nxt++;
1533
1534 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1535 goto variant;
1536 if (nxt - cur != 2)
1537 return(0);
1538 /* we parsed a region */
1539region:
1540 if (nxt[0] == 0)
1541 return(1);
1542 if (nxt[0] != '-')
1543 return(0);
1544
1545 nxt++;
1546 cur = nxt;
1547 /* now we can just have a variant */
1548 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1549 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1550 nxt++;
1551
1552 if ((nxt - cur < 5) || (nxt - cur > 8))
1553 return(0);
1554
1555 /* we parsed a variant */
1556variant:
1557 if (nxt[0] == 0)
1558 return(1);
1559 if (nxt[0] != '-')
1560 return(0);
1561 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001562 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001563
1564region_m49:
1565 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1566 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1567 nxt += 3;
1568 goto region;
1569 }
1570 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001571}
1572
Owen Taylor3473f882001-02-23 17:55:21 +00001573/************************************************************************
1574 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001575 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001576 * *
1577 ************************************************************************/
1578
Daniel Veillard8ed10722009-08-20 19:17:36 +02001579static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1580 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001581
Daniel Veillard0fb18932003-09-07 09:14:37 +00001582#ifdef SAX2
1583/**
1584 * nsPush:
1585 * @ctxt: an XML parser context
1586 * @prefix: the namespace prefix or NULL
1587 * @URL: the namespace name
1588 *
1589 * Pushes a new parser namespace on top of the ns stack
1590 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001591 * Returns -1 in case of error, -2 if the namespace should be discarded
1592 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001593 */
1594static int
1595nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1596{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001597 if (ctxt->options & XML_PARSE_NSCLEAN) {
1598 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001599 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001600 if (ctxt->nsTab[i] == prefix) {
1601 /* in scope */
1602 if (ctxt->nsTab[i + 1] == URL)
1603 return(-2);
1604 /* out of scope keep it */
1605 break;
1606 }
1607 }
1608 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001609 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1610 ctxt->nsMax = 10;
1611 ctxt->nsNr = 0;
1612 ctxt->nsTab = (const xmlChar **)
1613 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1614 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001615 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001616 ctxt->nsMax = 0;
1617 return (-1);
1618 }
1619 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001620 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001621 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001622 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1623 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1624 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001625 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001626 ctxt->nsMax /= 2;
1627 return (-1);
1628 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001629 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001630 }
1631 ctxt->nsTab[ctxt->nsNr++] = prefix;
1632 ctxt->nsTab[ctxt->nsNr++] = URL;
1633 return (ctxt->nsNr);
1634}
1635/**
1636 * nsPop:
1637 * @ctxt: an XML parser context
1638 * @nr: the number to pop
1639 *
1640 * Pops the top @nr parser prefix/namespace from the ns stack
1641 *
1642 * Returns the number of namespaces removed
1643 */
1644static int
1645nsPop(xmlParserCtxtPtr ctxt, int nr)
1646{
1647 int i;
1648
1649 if (ctxt->nsTab == NULL) return(0);
1650 if (ctxt->nsNr < nr) {
1651 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1652 nr = ctxt->nsNr;
1653 }
1654 if (ctxt->nsNr <= 0)
1655 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001656
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657 for (i = 0;i < nr;i++) {
1658 ctxt->nsNr--;
1659 ctxt->nsTab[ctxt->nsNr] = NULL;
1660 }
1661 return(nr);
1662}
1663#endif
1664
1665static int
1666xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1667 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001668 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001669 int maxatts;
1670
1671 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001672 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001673 atts = (const xmlChar **)
1674 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001675 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001676 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001677 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1678 if (attallocs == NULL) goto mem_error;
1679 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001680 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 } else if (nr + 5 > ctxt->maxatts) {
1682 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001683 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1684 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001686 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1688 (maxatts / 5) * sizeof(int));
1689 if (attallocs == NULL) goto mem_error;
1690 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001691 ctxt->maxatts = maxatts;
1692 }
1693 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001695 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001696 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001697}
1698
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001699/**
1700 * inputPush:
1701 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001702 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001703 *
1704 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001705 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001706 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001707 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001708int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001709inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1710{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001711 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001712 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001713 if (ctxt->inputNr >= ctxt->inputMax) {
1714 ctxt->inputMax *= 2;
1715 ctxt->inputTab =
1716 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1717 ctxt->inputMax *
1718 sizeof(ctxt->inputTab[0]));
1719 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001720 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001721 xmlFreeInputStream(value);
1722 ctxt->inputMax /= 2;
1723 value = NULL;
1724 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001725 }
1726 }
1727 ctxt->inputTab[ctxt->inputNr] = value;
1728 ctxt->input = value;
1729 return (ctxt->inputNr++);
1730}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001731/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001732 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001733 * @ctxt: an XML parser context
1734 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001736 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001737 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001738 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001739xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001740inputPop(xmlParserCtxtPtr ctxt)
1741{
1742 xmlParserInputPtr ret;
1743
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001744 if (ctxt == NULL)
1745 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001746 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001747 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001748 ctxt->inputNr--;
1749 if (ctxt->inputNr > 0)
1750 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1751 else
1752 ctxt->input = NULL;
1753 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001754 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001755 return (ret);
1756}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001757/**
1758 * nodePush:
1759 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001760 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001761 *
1762 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001763 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001764 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001765 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001766int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001767nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1768{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001771 xmlNodePtr *tmp;
1772
1773 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1774 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001775 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001776 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001777 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001778 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001780 ctxt->nodeTab = tmp;
1781 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001782 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001783 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1784 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001785 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001786 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001787 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001788 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001789 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001790 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001791 ctxt->nodeTab[ctxt->nodeNr] = value;
1792 ctxt->node = value;
1793 return (ctxt->nodeNr++);
1794}
Daniel Veillard8915c152008-08-26 13:05:34 +00001795
Daniel Veillard1c732d22002-11-30 11:22:59 +00001796/**
1797 * nodePop:
1798 * @ctxt: an XML parser context
1799 *
1800 * Pops the top element node from the node stack
1801 *
1802 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001803 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001804xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001805nodePop(xmlParserCtxtPtr ctxt)
1806{
1807 xmlNodePtr ret;
1808
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001809 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001810 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001811 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001812 ctxt->nodeNr--;
1813 if (ctxt->nodeNr > 0)
1814 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1815 else
1816 ctxt->node = NULL;
1817 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001818 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001819 return (ret);
1820}
Daniel Veillarda2351322004-06-27 12:08:10 +00001821
1822#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001823/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 * nameNsPush:
1825 * @ctxt: an XML parser context
1826 * @value: the element name
1827 * @prefix: the element prefix
1828 * @URI: the element namespace name
1829 *
1830 * Pushes a new element name/prefix/URL on top of the name stack
1831 *
1832 * Returns -1 in case of error, the index in the stack otherwise
1833 */
1834static int
1835nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1836 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1837{
1838 if (ctxt->nameNr >= ctxt->nameMax) {
1839 const xmlChar * *tmp;
1840 void **tmp2;
1841 ctxt->nameMax *= 2;
1842 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843 ctxt->nameMax *
1844 sizeof(ctxt->nameTab[0]));
1845 if (tmp == NULL) {
1846 ctxt->nameMax /= 2;
1847 goto mem_error;
1848 }
1849 ctxt->nameTab = tmp;
1850 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1851 ctxt->nameMax * 3 *
1852 sizeof(ctxt->pushTab[0]));
1853 if (tmp2 == NULL) {
1854 ctxt->nameMax /= 2;
1855 goto mem_error;
1856 }
1857 ctxt->pushTab = tmp2;
1858 }
1859 ctxt->nameTab[ctxt->nameNr] = value;
1860 ctxt->name = value;
1861 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1862 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Nick Wellnhoferd422b952017-10-09 13:37:42 +02001863 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001864 return (ctxt->nameNr++);
1865mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001866 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001867 return (-1);
1868}
1869/**
1870 * nameNsPop:
1871 * @ctxt: an XML parser context
1872 *
1873 * Pops the top element/prefix/URI name from the name stack
1874 *
1875 * Returns the name just removed
1876 */
1877static const xmlChar *
1878nameNsPop(xmlParserCtxtPtr ctxt)
1879{
1880 const xmlChar *ret;
1881
1882 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001883 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001884 ctxt->nameNr--;
1885 if (ctxt->nameNr > 0)
1886 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1887 else
1888 ctxt->name = NULL;
1889 ret = ctxt->nameTab[ctxt->nameNr];
1890 ctxt->nameTab[ctxt->nameNr] = NULL;
1891 return (ret);
1892}
Daniel Veillarda2351322004-06-27 12:08:10 +00001893#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001894
1895/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896 * namePush:
1897 * @ctxt: an XML parser context
1898 * @value: the element name
1899 *
1900 * Pushes a new element name on top of the name stack
1901 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001902 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001903 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001904int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001905namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001906{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001907 if (ctxt == NULL) return (-1);
1908
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001910 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001911 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001912 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001913 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001914 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001915 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001916 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001917 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001918 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001919 }
1920 ctxt->nameTab[ctxt->nameNr] = value;
1921 ctxt->name = value;
1922 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001923mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001924 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001925 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001926}
1927/**
1928 * namePop:
1929 * @ctxt: an XML parser context
1930 *
1931 * Pops the top element name from the name stack
1932 *
1933 * Returns the name just removed
1934 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001935const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001936namePop(xmlParserCtxtPtr ctxt)
1937{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001938 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001939
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001940 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1941 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001942 ctxt->nameNr--;
1943 if (ctxt->nameNr > 0)
1944 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1945 else
1946 ctxt->name = NULL;
1947 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001948 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001949 return (ret);
1950}
Owen Taylor3473f882001-02-23 17:55:21 +00001951
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001952static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001953 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001954 int *tmp;
1955
Owen Taylor3473f882001-02-23 17:55:21 +00001956 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001957 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1958 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1959 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001960 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001961 ctxt->spaceMax /=2;
1962 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001963 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001964 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001965 }
1966 ctxt->spaceTab[ctxt->spaceNr] = val;
1967 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1968 return(ctxt->spaceNr++);
1969}
1970
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001971static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001972 int ret;
1973 if (ctxt->spaceNr <= 0) return(0);
1974 ctxt->spaceNr--;
1975 if (ctxt->spaceNr > 0)
1976 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1977 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001978 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001979 ret = ctxt->spaceTab[ctxt->spaceNr];
1980 ctxt->spaceTab[ctxt->spaceNr] = -1;
1981 return(ret);
1982}
1983
1984/*
1985 * Macros for accessing the content. Those should be used only by the parser,
1986 * and not exported.
1987 *
1988 * Dirty macros, i.e. one often need to make assumption on the context to
1989 * use them
1990 *
1991 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1992 * To be used with extreme caution since operations consuming
1993 * characters may move the input buffer to a different location !
1994 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1995 * This should be used internally by the parser
1996 * only to compare to ASCII values otherwise it would break when
1997 * running with UTF-8 encoding.
1998 * RAW same as CUR but in the input buffer, bypass any token
1999 * extraction that may have been done
2000 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2001 * to compare on ASCII based substring.
2002 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002004 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002005 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002006 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2007 *
2008 * NEXT Skip to the next character, this does the proper decoding
2009 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002010 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002011 * CUR_CHAR(l) returns the current unicode character (int), set l
2012 * to the number of xmlChars used for the encoding [0-5].
2013 * CUR_SCHAR same but operate on a string instead of the context
2014 * COPY_BUF copy the current unicode char to the target buffer, increment
2015 * the index
2016 * GROW, SHRINK handling of input buffers
2017 */
2018
Daniel Veillardfdc91562002-07-01 21:52:03 +00002019#define RAW (*ctxt->input->cur)
2020#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002021#define NXT(val) ctxt->input->cur[(val)]
2022#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002023#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002024
Daniel Veillarda07050d2003-10-19 14:46:32 +00002025#define CMP4( s, c1, c2, c3, c4 ) \
2026 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2027 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2028#define CMP5( s, c1, c2, c3, c4, c5 ) \
2029 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2030#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2031 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2032#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2033 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2034#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2035 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2036#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2037 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2038 ((unsigned char *) s)[ 8 ] == c9 )
2039#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2040 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2041 ((unsigned char *) s)[ 9 ] == c10 )
2042
Owen Taylor3473f882001-02-23 17:55:21 +00002043#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002044 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002045 if (*ctxt->input->cur == 0) \
2046 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002047 } while (0)
2048
Daniel Veillard0b787f32004-03-26 17:29:53 +00002049#define SKIPL(val) do { \
2050 int skipl; \
2051 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002052 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002053 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002054 } else ctxt->input->col++; \
2055 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002056 ctxt->input->cur++; \
2057 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002058 if (*ctxt->input->cur == 0) \
2059 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002060 } while (0)
2061
Daniel Veillarda880b122003-04-21 21:36:41 +00002062#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002063 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2064 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002065 xmlSHRINK (ctxt);
2066
2067static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2068 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002069 if (*ctxt->input->cur == 0)
2070 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2071}
Owen Taylor3473f882001-02-23 17:55:21 +00002072
Daniel Veillarda880b122003-04-21 21:36:41 +00002073#define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002075 xmlGROW (ctxt);
2076
2077static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Vlad Tsyrklevich28f52fe2017-08-10 15:08:48 -07002083 ((ctxt->input->buf) &&
2084 (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002085 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002087 xmlHaltParser(ctxt);
2088 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002089 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002090 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002091 if ((ctxt->input->cur > ctxt->input->end) ||
2092 (ctxt->input->cur < ctxt->input->base)) {
2093 xmlHaltParser(ctxt);
2094 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2095 return;
2096 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002097 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2098 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002099}
Owen Taylor3473f882001-02-23 17:55:21 +00002100
2101#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103#define NEXT xmlNextChar(ctxt)
2104
Daniel Veillard21a0f912001-02-25 19:54:14 +00002105#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002106 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002109 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
Owen Taylor3473f882001-02-23 17:55:21 +00002113#define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002117 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002118 } while (0)
2119
2120#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2121#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2122
2123#define COPY_BUF(l,b,i,v) \
2124 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002125 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002126
2127/**
2128 * xmlSkipBlankChars:
2129 * @ctxt: the XML parser context
2130 *
2131 * skip all blanks character found at that point in the input streams.
2132 * It pops up finished entities in the process if allowable at that point.
2133 *
2134 * Returns the number of space chars skipped
2135 */
2136
2137int
2138xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002139 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002140
2141 /*
2142 * It's Okay to use CUR/NEXT here since all the blanks are on
2143 * the ASCII range.
2144 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002145 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2146 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002147 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002148 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002149 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002150 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002151 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002152 if (*cur == '\n') {
2153 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002154 } else {
2155 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002156 }
2157 cur++;
2158 res++;
2159 if (*cur == 0) {
2160 ctxt->input->cur = cur;
2161 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2162 cur = ctxt->input->cur;
2163 }
2164 }
2165 ctxt->input->cur = cur;
2166 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002167 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2168
2169 while (1) {
2170 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002171 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002172 } else if (CUR == '%') {
2173 /*
2174 * Need to handle support of entities branching here
2175 */
2176 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2177 break;
2178 xmlParsePEReference(ctxt);
2179 } else if (CUR == 0) {
2180 if (ctxt->inputNr <= 1)
2181 break;
2182 xmlPopInput(ctxt);
2183 } else {
2184 break;
2185 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002186
2187 /*
2188 * Also increase the counter when entering or exiting a PERef.
2189 * The spec says: "When a parameter-entity reference is recognized
2190 * in the DTD and included, its replacement text MUST be enlarged
2191 * by the attachment of one leading and one following space (#x20)
2192 * character."
2193 */
2194 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002195 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002196 }
Owen Taylor3473f882001-02-23 17:55:21 +00002197 return(res);
2198}
2199
2200/************************************************************************
2201 * *
2202 * Commodity functions to handle entities *
2203 * *
2204 ************************************************************************/
2205
2206/**
2207 * xmlPopInput:
2208 * @ctxt: an XML parser context
2209 *
2210 * xmlPopInput: the current input pointed by ctxt->input came to an end
2211 * pop it and return the next char.
2212 *
2213 * Returns the current xmlChar in the parser context
2214 */
2215xmlChar
2216xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002217 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002218 if (xmlParserDebugEntities)
2219 xmlGenericError(xmlGenericErrorContext,
2220 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002221 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2222 (ctxt->instate != XML_PARSER_EOF))
2223 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2224 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002225 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002226 if (*ctxt->input->cur == 0)
2227 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002228 return(CUR);
2229}
2230
2231/**
2232 * xmlPushInput:
2233 * @ctxt: an XML parser context
2234 * @input: an XML parser input fragment (entity, XML fragment ...).
2235 *
2236 * xmlPushInput: switch to a new input stream which is stacked on top
2237 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002238 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002239 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002240int
Owen Taylor3473f882001-02-23 17:55:21 +00002241xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002242 int ret;
2243 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002244
2245 if (xmlParserDebugEntities) {
2246 if ((ctxt->input != NULL) && (ctxt->input->filename))
2247 xmlGenericError(xmlGenericErrorContext,
2248 "%s(%d): ", ctxt->input->filename,
2249 ctxt->input->line);
2250 xmlGenericError(xmlGenericErrorContext,
2251 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002253 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254 (ctxt->inputNr > 1024)) {
2255 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2256 while (ctxt->inputNr > 1)
2257 xmlFreeInputStream(inputPop(ctxt));
2258 return(-1);
2259 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002260 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002261 if (ctxt->instate == XML_PARSER_EOF)
2262 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002263 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002264 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002265}
2266
2267/**
2268 * xmlParseCharRef:
2269 * @ctxt: an XML parser context
2270 *
2271 * parse Reference declarations
2272 *
2273 * [66] CharRef ::= '&#' [0-9]+ ';' |
2274 * '&#x' [0-9a-fA-F]+ ';'
2275 *
2276 * [ WFC: Legal Character ]
2277 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002278 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002279 *
2280 * Returns the value parsed (as an int), 0 in case of error
2281 */
2282int
2283xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002284 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002285 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002286 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002287
Owen Taylor3473f882001-02-23 17:55:21 +00002288 /*
2289 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2290 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002291 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002292 (NXT(2) == 'x')) {
2293 SKIP(3);
2294 GROW;
2295 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002296 if (count++ > 20) {
2297 count = 0;
2298 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002299 if (ctxt->instate == XML_PARSER_EOF)
2300 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002301 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002302 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002303 val = val * 16 + (CUR - '0');
2304 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2305 val = val * 16 + (CUR - 'a') + 10;
2306 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2307 val = val * 16 + (CUR - 'A') + 10;
2308 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002309 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002310 val = 0;
2311 break;
2312 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002313 if (val > 0x10FFFF)
2314 outofrange = val;
2315
Owen Taylor3473f882001-02-23 17:55:21 +00002316 NEXT;
2317 count++;
2318 }
2319 if (RAW == ';') {
2320 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002321 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002322 ctxt->nbChars ++;
2323 ctxt->input->cur++;
2324 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002325 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002326 SKIP(2);
2327 GROW;
2328 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002329 if (count++ > 20) {
2330 count = 0;
2331 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002332 if (ctxt->instate == XML_PARSER_EOF)
2333 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002334 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002335 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002336 val = val * 10 + (CUR - '0');
2337 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002338 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002339 val = 0;
2340 break;
2341 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002342 if (val > 0x10FFFF)
2343 outofrange = val;
2344
Owen Taylor3473f882001-02-23 17:55:21 +00002345 NEXT;
2346 count++;
2347 }
2348 if (RAW == ';') {
2349 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002350 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002351 ctxt->nbChars ++;
2352 ctxt->input->cur++;
2353 }
2354 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002355 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002356 }
2357
2358 /*
2359 * [ WFC: Legal Character ]
2360 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002361 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002362 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002363 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002364 return(val);
2365 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002366 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2367 "xmlParseCharRef: invalid xmlChar value %d\n",
2368 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002369 }
2370 return(0);
2371}
2372
2373/**
2374 * xmlParseStringCharRef:
2375 * @ctxt: an XML parser context
2376 * @str: a pointer to an index in the string
2377 *
2378 * parse Reference declarations, variant parsing from a string rather
2379 * than an an input flow.
2380 *
2381 * [66] CharRef ::= '&#' [0-9]+ ';' |
2382 * '&#x' [0-9a-fA-F]+ ';'
2383 *
2384 * [ WFC: Legal Character ]
2385 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002386 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002387 *
2388 * Returns the value parsed (as an int), 0 in case of error, str will be
2389 * updated to the current value of the index
2390 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002391static int
Owen Taylor3473f882001-02-23 17:55:21 +00002392xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2393 const xmlChar *ptr;
2394 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002395 unsigned int val = 0;
2396 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002397
2398 if ((str == NULL) || (*str == NULL)) return(0);
2399 ptr = *str;
2400 cur = *ptr;
2401 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2402 ptr += 3;
2403 cur = *ptr;
2404 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002405 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002406 val = val * 16 + (cur - '0');
2407 else if ((cur >= 'a') && (cur <= 'f'))
2408 val = val * 16 + (cur - 'a') + 10;
2409 else if ((cur >= 'A') && (cur <= 'F'))
2410 val = val * 16 + (cur - 'A') + 10;
2411 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002412 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002413 val = 0;
2414 break;
2415 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002416 if (val > 0x10FFFF)
2417 outofrange = val;
2418
Owen Taylor3473f882001-02-23 17:55:21 +00002419 ptr++;
2420 cur = *ptr;
2421 }
2422 if (cur == ';')
2423 ptr++;
2424 } else if ((cur == '&') && (ptr[1] == '#')){
2425 ptr += 2;
2426 cur = *ptr;
2427 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002428 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002429 val = val * 10 + (cur - '0');
2430 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002431 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 val = 0;
2433 break;
2434 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002435 if (val > 0x10FFFF)
2436 outofrange = val;
2437
Owen Taylor3473f882001-02-23 17:55:21 +00002438 ptr++;
2439 cur = *ptr;
2440 }
2441 if (cur == ';')
2442 ptr++;
2443 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002444 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002445 return(0);
2446 }
2447 *str = ptr;
2448
2449 /*
2450 * [ WFC: Legal Character ]
2451 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002452 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002453 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002454 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002455 return(val);
2456 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002457 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2458 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2459 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002460 }
2461 return(0);
2462}
2463
2464/**
2465 * xmlParserHandlePEReference:
2466 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002467 *
Owen Taylor3473f882001-02-23 17:55:21 +00002468 * [69] PEReference ::= '%' Name ';'
2469 *
2470 * [ WFC: No Recursion ]
2471 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002472 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002473 *
2474 * [ WFC: Entity Declared ]
2475 * In a document without any DTD, a document with only an internal DTD
2476 * subset which contains no parameter entity references, or a document
2477 * with "standalone='yes'", ... ... The declaration of a parameter
2478 * entity must precede any reference to it...
2479 *
2480 * [ VC: Entity Declared ]
2481 * In a document with an external subset or external parameter entities
2482 * with "standalone='no'", ... ... The declaration of a parameter entity
2483 * must precede any reference to it...
2484 *
2485 * [ WFC: In DTD ]
2486 * Parameter-entity references may only appear in the DTD.
2487 * NOTE: misleading but this is handled.
2488 *
2489 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002490 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002491 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002492 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002493 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002494 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002495 */
2496void
2497xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002498 switch(ctxt->instate) {
2499 case XML_PARSER_CDATA_SECTION:
2500 return;
2501 case XML_PARSER_COMMENT:
2502 return;
2503 case XML_PARSER_START_TAG:
2504 return;
2505 case XML_PARSER_END_TAG:
2506 return;
2507 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002508 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002509 return;
2510 case XML_PARSER_PROLOG:
2511 case XML_PARSER_START:
2512 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002513 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002514 return;
2515 case XML_PARSER_ENTITY_DECL:
2516 case XML_PARSER_CONTENT:
2517 case XML_PARSER_ATTRIBUTE_VALUE:
2518 case XML_PARSER_PI:
2519 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002520 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002521 /* we just ignore it there */
2522 return;
2523 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002524 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002525 return;
2526 case XML_PARSER_ENTITY_VALUE:
2527 /*
2528 * NOTE: in the case of entity values, we don't do the
2529 * substitution here since we need the literal
2530 * entity value to be able to save the internal
2531 * subset of the document.
2532 * This will be handled by xmlStringDecodeEntities
2533 */
2534 return;
2535 case XML_PARSER_DTD:
2536 /*
2537 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2538 * In the internal DTD subset, parameter-entity references
2539 * can occur only where markup declarations can occur, not
2540 * within markup declarations.
2541 * In that case this is handled in xmlParseMarkupDecl
2542 */
2543 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2544 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002545 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002546 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002547 break;
2548 case XML_PARSER_IGNORE:
2549 return;
2550 }
2551
Nick Wellnhofer03904152017-06-05 21:16:00 +02002552 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002553}
2554
2555/*
2556 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002557 * buffer##_size is expected to be a size_t
2558 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002559 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002560#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002561 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002562 size_t new_size = buffer##_size * 2 + n; \
2563 if (new_size < buffer##_size) goto mem_error; \
2564 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002565 if (tmp == NULL) goto mem_error; \
2566 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002567 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002568}
2569
2570/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002571 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002572 * @ctxt: the parser context
2573 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002574 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002575 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2576 * @end: an end marker xmlChar, 0 if none
2577 * @end2: an end marker xmlChar, 0 if none
2578 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002579 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002580 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002581 *
2582 * [67] Reference ::= EntityRef | CharRef
2583 *
2584 * [69] PEReference ::= '%' Name ';'
2585 *
2586 * Returns A newly allocated string with the substitution done. The caller
2587 * must deallocate it !
2588 */
2589xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002590xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2591 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002592 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002593 size_t buffer_size = 0;
2594 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002595
2596 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002597 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002598 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002599 xmlEntityPtr ent;
2600 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002601
Daniel Veillarda82b1822004-11-08 16:24:57 +00002602 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002603 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002604 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002605
Daniel Veillard0161e632008-08-28 15:36:32 +00002606 if (((ctxt->depth > 40) &&
2607 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2608 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002609 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002610 return(NULL);
2611 }
2612
2613 /*
2614 * allocate a translation buffer.
2615 */
2616 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002617 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002618 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002619
2620 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002621 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002622 * we are operating on already parsed values.
2623 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002624 if (str < last)
2625 c = CUR_SCHAR(str, l);
2626 else
2627 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002628 while ((c != 0) && (c != end) && /* non input consuming loop */
2629 (c != end2) && (c != end3)) {
2630
2631 if (c == 0) break;
2632 if ((c == '&') && (str[1] == '#')) {
2633 int val = xmlParseStringCharRef(ctxt, &str);
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002634 if (val == 0)
2635 goto int_error;
2636 COPY_BUF(0,buffer,nbchars,val);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002637 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002638 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002639 }
Owen Taylor3473f882001-02-23 17:55:21 +00002640 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2641 if (xmlParserDebugEntities)
2642 xmlGenericError(xmlGenericErrorContext,
2643 "String decoding Entity Reference: %.30s\n",
2644 str);
2645 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002646 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002647 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002648 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002649 if ((ent != NULL) &&
2650 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2651 if (ent->content != NULL) {
2652 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002653 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002654 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002655 }
Owen Taylor3473f882001-02-23 17:55:21 +00002656 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002657 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658 "predefined entity has no content\n");
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002659 goto int_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002660 }
2661 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002662 ctxt->depth++;
2663 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2664 0, 0, 0);
2665 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002666 if (rep == NULL)
2667 goto int_error;
Daniel Veillard0161e632008-08-28 15:36:32 +00002668
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002669 current = rep;
2670 while (*current != 0) { /* non input consuming loop */
2671 buffer[nbchars++] = *current++;
2672 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2673 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2674 goto int_error;
2675 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2676 }
2677 }
2678 xmlFree(rep);
2679 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002680 } else if (ent != NULL) {
2681 int i = xmlStrlen(ent->name);
2682 const xmlChar *cur = ent->name;
2683
2684 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002685 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002686 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002687 }
2688 for (;i > 0;i--)
2689 buffer[nbchars++] = *cur++;
2690 buffer[nbchars++] = ';';
2691 }
2692 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2693 if (xmlParserDebugEntities)
2694 xmlGenericError(xmlGenericErrorContext,
2695 "String decoding PE Reference: %.30s\n", str);
2696 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002697 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002698 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002699 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002700 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002701 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002702 /*
2703 * Note: external parsed entities will not be loaded,
2704 * it is not required for a non-validating parser to
2705 * complete external PEreferences coming from the
2706 * internal subset
2707 */
2708 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2709 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2710 (ctxt->validate != 0)) {
2711 xmlLoadEntityContent(ctxt, ent);
2712 } else {
2713 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2714 "not validating will not read content for PE entity %s\n",
2715 ent->name, NULL);
2716 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002717 }
Owen Taylor3473f882001-02-23 17:55:21 +00002718 ctxt->depth++;
2719 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2720 0, 0, 0);
2721 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002722 if (rep == NULL)
2723 goto int_error;
2724 current = rep;
2725 while (*current != 0) { /* non input consuming loop */
2726 buffer[nbchars++] = *current++;
2727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2728 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2729 goto int_error;
2730 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2731 }
2732 }
2733 xmlFree(rep);
2734 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002735 }
2736 } else {
2737 COPY_BUF(l,buffer,nbchars,c);
2738 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002739 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2740 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002741 }
2742 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002743 if (str < last)
2744 c = CUR_SCHAR(str, l);
2745 else
2746 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002747 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002748 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002749 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002750
2751mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002752 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002753int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002754 if (rep != NULL)
2755 xmlFree(rep);
2756 if (buffer != NULL)
2757 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002758 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002759}
2760
Daniel Veillarde57ec792003-09-10 10:50:59 +00002761/**
2762 * xmlStringDecodeEntities:
2763 * @ctxt: the parser context
2764 * @str: the input string
2765 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2766 * @end: an end marker xmlChar, 0 if none
2767 * @end2: an end marker xmlChar, 0 if none
2768 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002769 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002770 * Takes a entity string content and process to do the adequate substitutions.
2771 *
2772 * [67] Reference ::= EntityRef | CharRef
2773 *
2774 * [69] PEReference ::= '%' Name ';'
2775 *
2776 * Returns A newly allocated string with the substitution done. The caller
2777 * must deallocate it !
2778 */
2779xmlChar *
2780xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2781 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002782 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002783 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2784 end, end2, end3));
2785}
Owen Taylor3473f882001-02-23 17:55:21 +00002786
2787/************************************************************************
2788 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002789 * Commodity functions, cleanup needed ? *
2790 * *
2791 ************************************************************************/
2792
2793/**
2794 * areBlanks:
2795 * @ctxt: an XML parser context
2796 * @str: a xmlChar *
2797 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002798 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002799 *
2800 * Is this a sequence of blank chars that one can ignore ?
2801 *
2802 * Returns 1 if ignorable 0 otherwise.
2803 */
2804
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002805static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2806 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002807 int i, ret;
2808 xmlNodePtr lastChild;
2809
Daniel Veillard05c13a22001-09-09 08:38:09 +00002810 /*
2811 * Don't spend time trying to differentiate them, the same callback is
2812 * used !
2813 */
2814 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002815 return(0);
2816
Owen Taylor3473f882001-02-23 17:55:21 +00002817 /*
2818 * Check for xml:space value.
2819 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002820 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2821 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002822 return(0);
2823
2824 /*
2825 * Check that the string is made of blanks
2826 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002827 if (blank_chars == 0) {
2828 for (i = 0;i < len;i++)
2829 if (!(IS_BLANK_CH(str[i]))) return(0);
2830 }
Owen Taylor3473f882001-02-23 17:55:21 +00002831
2832 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002833 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002834 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002835 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002836 if (ctxt->myDoc != NULL) {
2837 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2838 if (ret == 0) return(1);
2839 if (ret == 1) return(0);
2840 }
2841
2842 /*
2843 * Otherwise, heuristic :-\
2844 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002845 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002846 if ((ctxt->node->children == NULL) &&
2847 (RAW == '<') && (NXT(1) == '/')) return(0);
2848
2849 lastChild = xmlGetLastChild(ctxt->node);
2850 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002851 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2852 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002853 } else if (xmlNodeIsText(lastChild))
2854 return(0);
2855 else if ((ctxt->node->children != NULL) &&
2856 (xmlNodeIsText(ctxt->node->children)))
2857 return(0);
2858 return(1);
2859}
2860
Owen Taylor3473f882001-02-23 17:55:21 +00002861/************************************************************************
2862 * *
2863 * Extra stuff for namespace support *
2864 * Relates to http://www.w3.org/TR/WD-xml-names *
2865 * *
2866 ************************************************************************/
2867
2868/**
2869 * xmlSplitQName:
2870 * @ctxt: an XML parser context
2871 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002872 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002873 *
2874 * parse an UTF8 encoded XML qualified name string
2875 *
2876 * [NS 5] QName ::= (Prefix ':')? LocalPart
2877 *
2878 * [NS 6] Prefix ::= NCName
2879 *
2880 * [NS 7] LocalPart ::= NCName
2881 *
2882 * Returns the local part, and prefix is updated
2883 * to get the Prefix if any.
2884 */
2885
2886xmlChar *
2887xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2888 xmlChar buf[XML_MAX_NAMELEN + 5];
2889 xmlChar *buffer = NULL;
2890 int len = 0;
2891 int max = XML_MAX_NAMELEN;
2892 xmlChar *ret = NULL;
2893 const xmlChar *cur = name;
2894 int c;
2895
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002896 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002897 *prefix = NULL;
2898
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002899 if (cur == NULL) return(NULL);
2900
Owen Taylor3473f882001-02-23 17:55:21 +00002901#ifndef XML_XML_NAMESPACE
2902 /* xml: prefix is not really a namespace */
2903 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2904 (cur[2] == 'l') && (cur[3] == ':'))
2905 return(xmlStrdup(name));
2906#endif
2907
Daniel Veillard597bc482003-07-24 16:08:28 +00002908 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002909 if (cur[0] == ':')
2910 return(xmlStrdup(name));
2911
2912 c = *cur++;
2913 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2914 buf[len++] = c;
2915 c = *cur++;
2916 }
2917 if (len >= max) {
2918 /*
2919 * Okay someone managed to make a huge name, so he's ready to pay
2920 * for the processing speed.
2921 */
2922 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002923
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002924 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002925 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002926 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002927 return(NULL);
2928 }
2929 memcpy(buffer, buf, len);
2930 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2931 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002932 xmlChar *tmp;
2933
Owen Taylor3473f882001-02-23 17:55:21 +00002934 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002935 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002936 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002937 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002938 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002939 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002940 return(NULL);
2941 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002942 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002943 }
2944 buffer[len++] = c;
2945 c = *cur++;
2946 }
2947 buffer[len] = 0;
2948 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002949
Daniel Veillard597bc482003-07-24 16:08:28 +00002950 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002951 if (buffer != NULL)
2952 xmlFree(buffer);
2953 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002954 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002955 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002956
Owen Taylor3473f882001-02-23 17:55:21 +00002957 if (buffer == NULL)
2958 ret = xmlStrndup(buf, len);
2959 else {
2960 ret = buffer;
2961 buffer = NULL;
2962 max = XML_MAX_NAMELEN;
2963 }
2964
2965
2966 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002967 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002968 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002969 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002970 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002971 }
Owen Taylor3473f882001-02-23 17:55:21 +00002972 len = 0;
2973
Daniel Veillardbb284f42002-10-16 18:02:47 +00002974 /*
2975 * Check that the first character is proper to start
2976 * a new name
2977 */
2978 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2979 ((c >= 0x41) && (c <= 0x5A)) ||
2980 (c == '_') || (c == ':'))) {
2981 int l;
2982 int first = CUR_SCHAR(cur, l);
2983
2984 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002985 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002986 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002987 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002988 }
2989 }
2990 cur++;
2991
Owen Taylor3473f882001-02-23 17:55:21 +00002992 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2993 buf[len++] = c;
2994 c = *cur++;
2995 }
2996 if (len >= max) {
2997 /*
2998 * Okay someone managed to make a huge name, so he's ready to pay
2999 * for the processing speed.
3000 */
3001 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003002
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
3009 while (c != 0) { /* tested bigname2.xml */
3010 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003011 xmlChar *tmp;
3012
Owen Taylor3473f882001-02-23 17:55:21 +00003013 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003014 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003015 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003016 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003017 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003018 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003019 return(NULL);
3020 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003021 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003022 }
3023 buffer[len++] = c;
3024 c = *cur++;
3025 }
3026 buffer[len] = 0;
3027 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003028
Owen Taylor3473f882001-02-23 17:55:21 +00003029 if (buffer == NULL)
3030 ret = xmlStrndup(buf, len);
3031 else {
3032 ret = buffer;
3033 }
3034 }
3035
3036 return(ret);
3037}
3038
3039/************************************************************************
3040 * *
3041 * The parser itself *
3042 * Relates to http://www.w3.org/TR/REC-xml *
3043 * *
3044 ************************************************************************/
3045
Daniel Veillard34e3f642008-07-29 09:02:27 +00003046/************************************************************************
3047 * *
3048 * Routines to parse Name, NCName and NmToken *
3049 * *
3050 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003051#ifdef DEBUG
3052static unsigned long nbParseName = 0;
3053static unsigned long nbParseNmToken = 0;
3054static unsigned long nbParseNCName = 0;
3055static unsigned long nbParseNCNameComplex = 0;
3056static unsigned long nbParseNameComplex = 0;
3057static unsigned long nbParseStringName = 0;
3058#endif
3059
Daniel Veillard34e3f642008-07-29 09:02:27 +00003060/*
3061 * The two following functions are related to the change of accepted
3062 * characters for Name and NmToken in the Revision 5 of XML-1.0
3063 * They correspond to the modified production [4] and the new production [4a]
3064 * changes in that revision. Also note that the macros used for the
3065 * productions Letter, Digit, CombiningChar and Extender are not needed
3066 * anymore.
3067 * We still keep compatibility to pre-revision5 parsing semantic if the
3068 * new XML_PARSE_OLD10 option is given to the parser.
3069 */
3070static int
3071xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3072 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3073 /*
3074 * Use the new checks of production [4] [4a] amd [5] of the
3075 * Update 5 of XML-1.0
3076 */
3077 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3078 (((c >= 'a') && (c <= 'z')) ||
3079 ((c >= 'A') && (c <= 'Z')) ||
3080 (c == '_') || (c == ':') ||
3081 ((c >= 0xC0) && (c <= 0xD6)) ||
3082 ((c >= 0xD8) && (c <= 0xF6)) ||
3083 ((c >= 0xF8) && (c <= 0x2FF)) ||
3084 ((c >= 0x370) && (c <= 0x37D)) ||
3085 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3086 ((c >= 0x200C) && (c <= 0x200D)) ||
3087 ((c >= 0x2070) && (c <= 0x218F)) ||
3088 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3089 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3090 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3091 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3092 ((c >= 0x10000) && (c <= 0xEFFFF))))
3093 return(1);
3094 } else {
3095 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3096 return(1);
3097 }
3098 return(0);
3099}
3100
3101static int
3102xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3103 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3104 /*
3105 * Use the new checks of production [4] [4a] amd [5] of the
3106 * Update 5 of XML-1.0
3107 */
3108 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3109 (((c >= 'a') && (c <= 'z')) ||
3110 ((c >= 'A') && (c <= 'Z')) ||
3111 ((c >= '0') && (c <= '9')) || /* !start */
3112 (c == '_') || (c == ':') ||
3113 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3114 ((c >= 0xC0) && (c <= 0xD6)) ||
3115 ((c >= 0xD8) && (c <= 0xF6)) ||
3116 ((c >= 0xF8) && (c <= 0x2FF)) ||
3117 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3118 ((c >= 0x370) && (c <= 0x37D)) ||
3119 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3120 ((c >= 0x200C) && (c <= 0x200D)) ||
3121 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3122 ((c >= 0x2070) && (c <= 0x218F)) ||
3123 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3124 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3125 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3126 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3127 ((c >= 0x10000) && (c <= 0xEFFFF))))
3128 return(1);
3129 } else {
3130 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3131 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003132 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003133 (IS_COMBINING(c)) ||
3134 (IS_EXTENDER(c)))
3135 return(1);
3136 }
3137 return(0);
3138}
3139
Daniel Veillarde57ec792003-09-10 10:50:59 +00003140static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003141 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003142
Daniel Veillard34e3f642008-07-29 09:02:27 +00003143static const xmlChar *
3144xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3145 int len = 0, l;
3146 int c;
3147 int count = 0;
3148
Daniel Veillardc6561462009-03-25 10:22:31 +00003149#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003150 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003151#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003152
3153 /*
3154 * Handler for more complex cases
3155 */
3156 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003157 if (ctxt->instate == XML_PARSER_EOF)
3158 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003159 c = CUR_CHAR(l);
3160 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161 /*
3162 * Use the new checks of production [4] [4a] amd [5] of the
3163 * Update 5 of XML-1.0
3164 */
3165 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3166 (!(((c >= 'a') && (c <= 'z')) ||
3167 ((c >= 'A') && (c <= 'Z')) ||
3168 (c == '_') || (c == ':') ||
3169 ((c >= 0xC0) && (c <= 0xD6)) ||
3170 ((c >= 0xD8) && (c <= 0xF6)) ||
3171 ((c >= 0xF8) && (c <= 0x2FF)) ||
3172 ((c >= 0x370) && (c <= 0x37D)) ||
3173 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3174 ((c >= 0x200C) && (c <= 0x200D)) ||
3175 ((c >= 0x2070) && (c <= 0x218F)) ||
3176 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3177 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3178 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3179 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3180 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3181 return(NULL);
3182 }
3183 len += l;
3184 NEXTL(l);
3185 c = CUR_CHAR(l);
3186 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3187 (((c >= 'a') && (c <= 'z')) ||
3188 ((c >= 'A') && (c <= 'Z')) ||
3189 ((c >= '0') && (c <= '9')) || /* !start */
3190 (c == '_') || (c == ':') ||
3191 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3192 ((c >= 0xC0) && (c <= 0xD6)) ||
3193 ((c >= 0xD8) && (c <= 0xF6)) ||
3194 ((c >= 0xF8) && (c <= 0x2FF)) ||
3195 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3196 ((c >= 0x370) && (c <= 0x37D)) ||
3197 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3198 ((c >= 0x200C) && (c <= 0x200D)) ||
3199 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3200 ((c >= 0x2070) && (c <= 0x218F)) ||
3201 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205 ((c >= 0x10000) && (c <= 0xEFFFF))
3206 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003207 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003208 count = 0;
3209 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003210 if (ctxt->instate == XML_PARSER_EOF)
3211 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003212 }
3213 len += l;
3214 NEXTL(l);
3215 c = CUR_CHAR(l);
3216 }
3217 } else {
3218 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3219 (!IS_LETTER(c) && (c != '_') &&
3220 (c != ':'))) {
3221 return(NULL);
3222 }
3223 len += l;
3224 NEXTL(l);
3225 c = CUR_CHAR(l);
3226
3227 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3228 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3229 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003230 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003231 (IS_COMBINING(c)) ||
3232 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003233 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003234 count = 0;
3235 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003236 if (ctxt->instate == XML_PARSER_EOF)
3237 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003238 }
3239 len += l;
3240 NEXTL(l);
3241 c = CUR_CHAR(l);
3242 }
3243 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003244 if ((len > XML_MAX_NAME_LENGTH) &&
3245 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3246 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3247 return(NULL);
3248 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003249 if (ctxt->input->cur - ctxt->input->base < len) {
3250 /*
3251 * There were a couple of bugs where PERefs lead to to a change
3252 * of the buffer. Check the buffer size to avoid passing an invalid
3253 * pointer to xmlDictLookup.
3254 */
3255 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3256 "unexpected change of input buffer");
3257 return (NULL);
3258 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003259 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3260 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3261 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3262}
3263
Owen Taylor3473f882001-02-23 17:55:21 +00003264/**
3265 * xmlParseName:
3266 * @ctxt: an XML parser context
3267 *
3268 * parse an XML name.
3269 *
3270 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3271 * CombiningChar | Extender
3272 *
3273 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3274 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003275 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003276 *
3277 * Returns the Name parsed or NULL
3278 */
3279
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003280const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003281xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003282 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003283 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003284 int count = 0;
3285
3286 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003287
Daniel Veillardc6561462009-03-25 10:22:31 +00003288#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003289 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003290#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291
Daniel Veillard48b2f892001-02-25 16:11:03 +00003292 /*
3293 * Accelerator for simple ASCII names
3294 */
3295 in = ctxt->input->cur;
3296 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3297 ((*in >= 0x41) && (*in <= 0x5A)) ||
3298 (*in == '_') || (*in == ':')) {
3299 in++;
3300 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3301 ((*in >= 0x41) && (*in <= 0x5A)) ||
3302 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003303 (*in == '_') || (*in == '-') ||
3304 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003305 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003306 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003307 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003308 if ((count > XML_MAX_NAME_LENGTH) &&
3309 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3310 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3311 return(NULL);
3312 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003313 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003314 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003315 ctxt->nbChars += count;
3316 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003317 if (ret == NULL)
3318 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003319 return(ret);
3320 }
3321 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003322 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003323 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003324}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003325
Daniel Veillard34e3f642008-07-29 09:02:27 +00003326static const xmlChar *
3327xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3328 int len = 0, l;
3329 int c;
3330 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003331 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332
Daniel Veillardc6561462009-03-25 10:22:31 +00003333#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003334 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003335#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003336
3337 /*
3338 * Handler for more complex cases
3339 */
3340 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003341 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003342 c = CUR_CHAR(l);
3343 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3344 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3345 return(NULL);
3346 }
3347
3348 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3349 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003350 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003351 if ((len > XML_MAX_NAME_LENGTH) &&
3352 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3353 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3354 return(NULL);
3355 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003356 count = 0;
3357 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003358 if (ctxt->instate == XML_PARSER_EOF)
3359 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003360 }
3361 len += l;
3362 NEXTL(l);
3363 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003364 if (c == 0) {
3365 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003366 /*
3367 * when shrinking to extend the buffer we really need to preserve
3368 * the part of the name we already parsed. Hence rolling back
3369 * by current lenght.
3370 */
3371 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003372 GROW;
3373 if (ctxt->instate == XML_PARSER_EOF)
3374 return(NULL);
Nick Wellnhofer132af1a2018-01-08 18:48:01 +01003375 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003376 c = CUR_CHAR(l);
3377 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003378 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003379 if ((len > XML_MAX_NAME_LENGTH) &&
3380 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3381 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3382 return(NULL);
3383 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003384 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003385}
3386
3387/**
3388 * xmlParseNCName:
3389 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003390 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003391 *
3392 * parse an XML name.
3393 *
3394 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3395 * CombiningChar | Extender
3396 *
3397 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3398 *
3399 * Returns the Name parsed or NULL
3400 */
3401
3402static const xmlChar *
3403xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003404 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 const xmlChar *ret;
3406 int count = 0;
3407
Daniel Veillardc6561462009-03-25 10:22:31 +00003408#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003409 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003410#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003411
3412 /*
3413 * Accelerator for simple ASCII names
3414 */
3415 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003416 e = ctxt->input->end;
3417 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3418 ((*in >= 0x41) && (*in <= 0x5A)) ||
3419 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003421 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3422 ((*in >= 0x41) && (*in <= 0x5A)) ||
3423 ((*in >= 0x30) && (*in <= 0x39)) ||
3424 (*in == '_') || (*in == '-') ||
3425 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003426 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003427 if (in >= e)
3428 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003429 if ((*in > 0) && (*in < 0x80)) {
3430 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003431 if ((count > XML_MAX_NAME_LENGTH) &&
3432 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3433 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3434 return(NULL);
3435 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003436 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3437 ctxt->input->cur = in;
3438 ctxt->nbChars += count;
3439 ctxt->input->col += count;
3440 if (ret == NULL) {
3441 xmlErrMemory(ctxt, NULL);
3442 }
3443 return(ret);
3444 }
3445 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003446complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003447 return(xmlParseNCNameComplex(ctxt));
3448}
3449
Daniel Veillard46de64e2002-05-29 08:21:33 +00003450/**
3451 * xmlParseNameAndCompare:
3452 * @ctxt: an XML parser context
3453 *
3454 * parse an XML name and compares for match
3455 * (specialized for endtag parsing)
3456 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003457 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3458 * and the name for mismatch
3459 */
3460
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003461static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003462xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003463 register const xmlChar *cmp = other;
3464 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003465 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003466
3467 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003468 if (ctxt->instate == XML_PARSER_EOF)
3469 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003470
Daniel Veillard46de64e2002-05-29 08:21:33 +00003471 in = ctxt->input->cur;
3472 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003473 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003474 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003475 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003476 }
William M. Brack76e95df2003-10-18 16:20:14 +00003477 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003479 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003480 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003481 }
3482 /* failure (or end of input buffer), check with full function */
3483 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003484 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003485 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003486 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003487 }
3488 return ret;
3489}
3490
Owen Taylor3473f882001-02-23 17:55:21 +00003491/**
3492 * xmlParseStringName:
3493 * @ctxt: an XML parser context
3494 * @str: a pointer to the string pointer (IN/OUT)
3495 *
3496 * parse an XML name.
3497 *
3498 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3499 * CombiningChar | Extender
3500 *
3501 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3502 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003503 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003504 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003505 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003506 * is updated to the current location in the string.
3507 */
3508
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003509static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003510xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3511 xmlChar buf[XML_MAX_NAMELEN + 5];
3512 const xmlChar *cur = *str;
3513 int len = 0, l;
3514 int c;
3515
Daniel Veillardc6561462009-03-25 10:22:31 +00003516#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003517 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003518#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003519
Owen Taylor3473f882001-02-23 17:55:21 +00003520 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003521 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return(NULL);
3523 }
3524
Daniel Veillard34e3f642008-07-29 09:02:27 +00003525 COPY_BUF(l,buf,len,c);
3526 cur += l;
3527 c = CUR_SCHAR(cur, l);
3528 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003529 COPY_BUF(l,buf,len,c);
3530 cur += l;
3531 c = CUR_SCHAR(cur, l);
3532 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3533 /*
3534 * Okay someone managed to make a huge name, so he's ready to pay
3535 * for the processing speed.
3536 */
3537 xmlChar *buffer;
3538 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003539
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003540 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003541 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003542 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003543 return(NULL);
3544 }
3545 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003546 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003547 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003548 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003549
3550 if ((len > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 xmlFree(buffer);
3554 return(NULL);
3555 }
Owen Taylor3473f882001-02-23 17:55:21 +00003556 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003557 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003558 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003559 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003560 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003561 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003562 return(NULL);
3563 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003564 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003565 }
3566 COPY_BUF(l,buffer,len,c);
3567 cur += l;
3568 c = CUR_SCHAR(cur, l);
3569 }
3570 buffer[len] = 0;
3571 *str = cur;
3572 return(buffer);
3573 }
3574 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003575 if ((len > XML_MAX_NAME_LENGTH) &&
3576 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578 return(NULL);
3579 }
Owen Taylor3473f882001-02-23 17:55:21 +00003580 *str = cur;
3581 return(xmlStrndup(buf, len));
3582}
3583
3584/**
3585 * xmlParseNmtoken:
3586 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003587 *
Owen Taylor3473f882001-02-23 17:55:21 +00003588 * parse an XML Nmtoken.
3589 *
3590 * [7] Nmtoken ::= (NameChar)+
3591 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003592 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003593 *
3594 * Returns the Nmtoken parsed or NULL
3595 */
3596
3597xmlChar *
3598xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3599 xmlChar buf[XML_MAX_NAMELEN + 5];
3600 int len = 0, l;
3601 int c;
3602 int count = 0;
3603
Daniel Veillardc6561462009-03-25 10:22:31 +00003604#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003605 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003606#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003607
Owen Taylor3473f882001-02-23 17:55:21 +00003608 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003609 if (ctxt->instate == XML_PARSER_EOF)
3610 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003611 c = CUR_CHAR(l);
3612
Daniel Veillard34e3f642008-07-29 09:02:27 +00003613 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003614 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003615 count = 0;
3616 GROW;
3617 }
3618 COPY_BUF(l,buf,len,c);
3619 NEXTL(l);
3620 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003621 if (c == 0) {
3622 count = 0;
3623 GROW;
3624 if (ctxt->instate == XML_PARSER_EOF)
3625 return(NULL);
3626 c = CUR_CHAR(l);
3627 }
Owen Taylor3473f882001-02-23 17:55:21 +00003628 if (len >= XML_MAX_NAMELEN) {
3629 /*
3630 * Okay someone managed to make a huge token, so he's ready to pay
3631 * for the processing speed.
3632 */
3633 xmlChar *buffer;
3634 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003635
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003636 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003637 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003638 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003639 return(NULL);
3640 }
3641 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003642 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003643 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003644 count = 0;
3645 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003646 if (ctxt->instate == XML_PARSER_EOF) {
3647 xmlFree(buffer);
3648 return(NULL);
3649 }
Owen Taylor3473f882001-02-23 17:55:21 +00003650 }
3651 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003652 xmlChar *tmp;
3653
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003654 if ((max > XML_MAX_NAME_LENGTH) &&
3655 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3656 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3657 xmlFree(buffer);
3658 return(NULL);
3659 }
Owen Taylor3473f882001-02-23 17:55:21 +00003660 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003661 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003662 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003663 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003664 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003665 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003666 return(NULL);
3667 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003668 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003669 }
3670 COPY_BUF(l,buffer,len,c);
3671 NEXTL(l);
3672 c = CUR_CHAR(l);
3673 }
3674 buffer[len] = 0;
3675 return(buffer);
3676 }
3677 }
3678 if (len == 0)
3679 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003680 if ((len > XML_MAX_NAME_LENGTH) &&
3681 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3682 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3683 return(NULL);
3684 }
Owen Taylor3473f882001-02-23 17:55:21 +00003685 return(xmlStrndup(buf, len));
3686}
3687
3688/**
3689 * xmlParseEntityValue:
3690 * @ctxt: an XML parser context
3691 * @orig: if non-NULL store a copy of the original entity value
3692 *
3693 * parse a value for ENTITY declarations
3694 *
3695 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3696 * "'" ([^%&'] | PEReference | Reference)* "'"
3697 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003698 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003699 */
3700
3701xmlChar *
3702xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3703 xmlChar *buf = NULL;
3704 int len = 0;
3705 int size = XML_PARSER_BUFFER_SIZE;
3706 int c, l;
3707 xmlChar stop;
3708 xmlChar *ret = NULL;
3709 const xmlChar *cur = NULL;
3710 xmlParserInputPtr input;
3711
3712 if (RAW == '"') stop = '"';
3713 else if (RAW == '\'') stop = '\'';
3714 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003715 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 return(NULL);
3717 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003718 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003719 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003720 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003721 return(NULL);
3722 }
3723
3724 /*
3725 * The content of the entity definition is copied in a buffer.
3726 */
3727
3728 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3729 input = ctxt->input;
3730 GROW;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003731 if (ctxt->instate == XML_PARSER_EOF)
3732 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003733 NEXT;
3734 c = CUR_CHAR(l);
3735 /*
3736 * NOTE: 4.4.5 Included in Literal
3737 * When a parameter entity reference appears in a literal entity
3738 * value, ... a single or double quote character in the replacement
3739 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003740 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003741 * In practice it means we stop the loop only when back at parsing
3742 * the initial entity and the quote is found
3743 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003744 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3745 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003746 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003747 xmlChar *tmp;
3748
Owen Taylor3473f882001-02-23 17:55:21 +00003749 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003750 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3751 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003752 xmlErrMemory(ctxt, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003753 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003754 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003755 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003756 }
3757 COPY_BUF(l,buf,len,c);
3758 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003759
3760 GROW;
3761 c = CUR_CHAR(l);
3762 if (c == 0) {
3763 GROW;
3764 c = CUR_CHAR(l);
3765 }
3766 }
3767 buf[len] = 0;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003768 if (ctxt->instate == XML_PARSER_EOF)
3769 goto error;
3770 if (c != stop) {
3771 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3772 goto error;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003773 }
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003774 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00003775
3776 /*
3777 * Raise problem w.r.t. '&' and '%' being used in non-entities
3778 * reference constructs. Note Charref will be handled in
3779 * xmlStringDecodeEntities()
3780 */
3781 cur = buf;
3782 while (*cur != 0) { /* non input consuming */
3783 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3784 xmlChar *name;
3785 xmlChar tmp = *cur;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003786 int nameOk = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003787
3788 cur++;
3789 name = xmlParseStringName(ctxt, &cur);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003790 if (name != NULL) {
3791 nameOk = 1;
3792 xmlFree(name);
3793 }
3794 if ((nameOk == 0) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003795 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003796 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003797 tmp);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003798 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003799 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003800 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3801 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003802 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003803 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003804 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003805 if (*cur == 0)
3806 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003807 }
3808 cur++;
3809 }
3810
3811 /*
3812 * Then PEReference entities are substituted.
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003813 *
3814 * NOTE: 4.4.7 Bypassed
3815 * When a general entity reference appears in the EntityValue in
3816 * an entity declaration, it is bypassed and left as is.
3817 * so XML_SUBSTITUTE_REF is not set here.
Owen Taylor3473f882001-02-23 17:55:21 +00003818 */
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003819 ++ctxt->depth;
3820 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3821 0, 0, 0);
3822 --ctxt->depth;
3823 if (orig != NULL) {
3824 *orig = buf;
3825 buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003826 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003827
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003828error:
3829 if (buf != NULL)
3830 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003831 return(ret);
3832}
3833
3834/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003835 * xmlParseAttValueComplex:
3836 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003837 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003838 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003839 *
3840 * parse a value for an attribute, this is the fallback function
3841 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003842 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003843 *
3844 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3845 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003846static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003847xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003848 xmlChar limit = 0;
3849 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003850 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003851 size_t len = 0;
3852 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003853 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003854 xmlChar *current = NULL;
3855 xmlEntityPtr ent;
3856
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (NXT(0) == '"') {
3858 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3859 limit = '"';
3860 NEXT;
3861 } else if (NXT(0) == '\'') {
3862 limit = '\'';
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864 NEXT;
3865 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003866 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003867 return(NULL);
3868 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003869
Owen Taylor3473f882001-02-23 17:55:21 +00003870 /*
3871 * allocate a translation buffer.
3872 */
3873 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003874 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003875 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003876
3877 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003878 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003879 */
3880 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003881 while (((NXT(0) != limit) && /* checked */
3882 (IS_CHAR(c)) && (c != '<')) &&
3883 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003884 /*
3885 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3886 * special option is given
3887 */
3888 if ((len > XML_MAX_TEXT_LENGTH) &&
3889 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3890 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003891 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003892 goto mem_error;
3893 }
Owen Taylor3473f882001-02-23 17:55:21 +00003894 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003895 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003896 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003897 if (NXT(1) == '#') {
3898 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003899
Owen Taylor3473f882001-02-23 17:55:21 +00003900 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003901 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003902 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003903 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003904 }
3905 buf[len++] = '&';
3906 } else {
3907 /*
3908 * The reparsing will be done in xmlStringGetNodeList()
3909 * called by the attribute() function in SAX.c
3910 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003911 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003912 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003913 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003914 buf[len++] = '&';
3915 buf[len++] = '#';
3916 buf[len++] = '3';
3917 buf[len++] = '8';
3918 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003919 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003920 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003921 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003922 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003923 }
Owen Taylor3473f882001-02-23 17:55:21 +00003924 len += xmlCopyChar(0, &buf[len], val);
3925 }
3926 } else {
3927 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003928 ctxt->nbentities++;
3929 if (ent != NULL)
3930 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003931 if ((ent != NULL) &&
3932 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003933 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003934 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003935 }
3936 if ((ctxt->replaceEntities == 0) &&
3937 (ent->content[0] == '&')) {
3938 buf[len++] = '&';
3939 buf[len++] = '#';
3940 buf[len++] = '3';
3941 buf[len++] = '8';
3942 buf[len++] = ';';
3943 } else {
3944 buf[len++] = ent->content[0];
3945 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003946 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003947 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003948 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02003949 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003950 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003951 XML_SUBSTITUTE_REF,
3952 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003953 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003954 if (rep != NULL) {
3955 current = rep;
3956 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003957 if ((*current == 0xD) || (*current == 0xA) ||
3958 (*current == 0x9)) {
3959 buf[len++] = 0x20;
3960 current++;
3961 } else
3962 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003963 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003964 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003965 }
3966 }
3967 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003968 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003969 }
3970 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003971 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003972 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003973 }
Owen Taylor3473f882001-02-23 17:55:21 +00003974 if (ent->content != NULL)
3975 buf[len++] = ent->content[0];
3976 }
3977 } else if (ent != NULL) {
3978 int i = xmlStrlen(ent->name);
3979 const xmlChar *cur = ent->name;
3980
3981 /*
3982 * This may look absurd but is needed to detect
3983 * entities problems
3984 */
3985 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003986 (ent->content != NULL) && (ent->checked == 0)) {
3987 unsigned long oldnbent = ctxt->nbentities;
3988
Peter Simons8f30bdf2016-04-15 11:56:55 +02003989 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003990 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003991 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003992 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003993
Daniel Veillardcff25462013-03-11 15:57:55 +08003994 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003995 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08003996 if (xmlStrchr(rep, '<'))
3997 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003998 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003999 rep = NULL;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02004000 } else {
4001 ent->content[0] = 0;
4002 }
Owen Taylor3473f882001-02-23 17:55:21 +00004003 }
4004
4005 /*
4006 * Just output the reference
4007 */
4008 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004009 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004010 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004011 }
4012 for (;i > 0;i--)
4013 buf[len++] = *cur++;
4014 buf[len++] = ';';
4015 }
4016 }
4017 } else {
4018 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004019 if ((len != 0) || (!normalize)) {
4020 if ((!normalize) || (!in_space)) {
4021 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004022 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004023 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004024 }
4025 }
4026 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004027 }
4028 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004029 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004030 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004031 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004032 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004033 }
4034 }
4035 NEXTL(l);
4036 }
4037 GROW;
4038 c = CUR_CHAR(l);
4039 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004040 if (ctxt->instate == XML_PARSER_EOF)
4041 goto error;
4042
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004043 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004044 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004045 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004046 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004047 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004048 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004049 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004050 if ((c != 0) && (!IS_CHAR(c))) {
4051 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4052 "invalid character in attribute value\n");
4053 } else {
4054 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4055 "AttValue: ' expected\n");
4056 }
Owen Taylor3473f882001-02-23 17:55:21 +00004057 } else
4058 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004059
4060 /*
4061 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004062 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004063 */
4064 if (len >= INT_MAX) {
4065 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004066 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004067 goto mem_error;
4068 }
4069
4070 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004071 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004072
4073mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004074 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004075error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004076 if (buf != NULL)
4077 xmlFree(buf);
4078 if (rep != NULL)
4079 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004080 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004081}
4082
4083/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004084 * xmlParseAttValue:
4085 * @ctxt: an XML parser context
4086 *
4087 * parse a value for an attribute
4088 * Note: the parser won't do substitution of entities here, this
4089 * will be handled later in xmlStringGetNodeList
4090 *
4091 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4092 * "'" ([^<&'] | Reference)* "'"
4093 *
4094 * 3.3.3 Attribute-Value Normalization:
4095 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004096 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004097 * - a character reference is processed by appending the referenced
4098 * character to the attribute value
4099 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004100 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004101 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4102 * appending #x20 to the normalized value, except that only a single
4103 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004104 * parsed entity or the literal entity value of an internal parsed entity
4105 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004106 * If the declared value is not CDATA, then the XML processor must further
4107 * process the normalized attribute value by discarding any leading and
4108 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004109 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004110 * All attributes for which no declaration has been read should be treated
4111 * by a non-validating parser as if declared CDATA.
4112 *
4113 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4114 */
4115
4116
4117xmlChar *
4118xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004119 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004120 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004121}
4122
4123/**
Owen Taylor3473f882001-02-23 17:55:21 +00004124 * xmlParseSystemLiteral:
4125 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004126 *
Owen Taylor3473f882001-02-23 17:55:21 +00004127 * parse an XML Literal
4128 *
4129 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4130 *
4131 * Returns the SystemLiteral parsed or NULL
4132 */
4133
4134xmlChar *
4135xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4136 xmlChar *buf = NULL;
4137 int len = 0;
4138 int size = XML_PARSER_BUFFER_SIZE;
4139 int cur, l;
4140 xmlChar stop;
4141 int state = ctxt->instate;
4142 int count = 0;
4143
4144 SHRINK;
4145 if (RAW == '"') {
4146 NEXT;
4147 stop = '"';
4148 } else if (RAW == '\'') {
4149 NEXT;
4150 stop = '\'';
4151 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004152 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004153 return(NULL);
4154 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004155
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004156 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004157 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004158 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004159 return(NULL);
4160 }
4161 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4162 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004163 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004164 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004165 xmlChar *tmp;
4166
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004167 if ((size > XML_MAX_NAME_LENGTH) &&
4168 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4169 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4170 xmlFree(buf);
4171 ctxt->instate = (xmlParserInputState) state;
4172 return(NULL);
4173 }
Owen Taylor3473f882001-02-23 17:55:21 +00004174 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004175 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4176 if (tmp == NULL) {
4177 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004178 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004179 ctxt->instate = (xmlParserInputState) state;
4180 return(NULL);
4181 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004182 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004183 }
4184 count++;
4185 if (count > 50) {
4186 GROW;
4187 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004188 if (ctxt->instate == XML_PARSER_EOF) {
4189 xmlFree(buf);
4190 return(NULL);
4191 }
Owen Taylor3473f882001-02-23 17:55:21 +00004192 }
4193 COPY_BUF(l,buf,len,cur);
4194 NEXTL(l);
4195 cur = CUR_CHAR(l);
4196 if (cur == 0) {
4197 GROW;
4198 SHRINK;
4199 cur = CUR_CHAR(l);
4200 }
4201 }
4202 buf[len] = 0;
4203 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004204 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004205 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004206 } else {
4207 NEXT;
4208 }
4209 return(buf);
4210}
4211
4212/**
4213 * xmlParsePubidLiteral:
4214 * @ctxt: an XML parser context
4215 *
4216 * parse an XML public literal
4217 *
4218 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4219 *
4220 * Returns the PubidLiteral parsed or NULL.
4221 */
4222
4223xmlChar *
4224xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4225 xmlChar *buf = NULL;
4226 int len = 0;
4227 int size = XML_PARSER_BUFFER_SIZE;
4228 xmlChar cur;
4229 xmlChar stop;
4230 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004231 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004232
4233 SHRINK;
4234 if (RAW == '"') {
4235 NEXT;
4236 stop = '"';
4237 } else if (RAW == '\'') {
4238 NEXT;
4239 stop = '\'';
4240 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004241 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004242 return(NULL);
4243 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004244 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004245 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004246 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 return(NULL);
4248 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004249 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004250 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004251 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004252 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004253 xmlChar *tmp;
4254
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004255 if ((size > XML_MAX_NAME_LENGTH) &&
4256 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4257 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4258 xmlFree(buf);
4259 return(NULL);
4260 }
Owen Taylor3473f882001-02-23 17:55:21 +00004261 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004262 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4263 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004264 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004265 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004266 return(NULL);
4267 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004268 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004269 }
4270 buf[len++] = cur;
4271 count++;
4272 if (count > 50) {
4273 GROW;
4274 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004275 if (ctxt->instate == XML_PARSER_EOF) {
4276 xmlFree(buf);
4277 return(NULL);
4278 }
Owen Taylor3473f882001-02-23 17:55:21 +00004279 }
4280 NEXT;
4281 cur = CUR;
4282 if (cur == 0) {
4283 GROW;
4284 SHRINK;
4285 cur = CUR;
4286 }
4287 }
4288 buf[len] = 0;
4289 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 } else {
4292 NEXT;
4293 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004294 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004295 return(buf);
4296}
4297
Daniel Veillard8ed10722009-08-20 19:17:36 +02004298static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004299
4300/*
4301 * used for the test in the inner loop of the char data testing
4302 */
4303static const unsigned char test_char_data[256] = {
4304 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4305 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4306 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4307 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4308 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4309 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4310 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4311 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4312 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4313 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4314 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4315 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4316 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4317 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4318 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4319 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4320 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4321 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4322 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4323 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4324 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4336};
4337
Owen Taylor3473f882001-02-23 17:55:21 +00004338/**
4339 * xmlParseCharData:
4340 * @ctxt: an XML parser context
4341 * @cdata: int indicating whether we are within a CDATA section
4342 *
4343 * parse a CharData section.
4344 * if we are within a CDATA section ']]>' marks an end of section.
4345 *
4346 * The right angle bracket (>) may be represented using the string "&gt;",
4347 * and must, for compatibility, be escaped using "&gt;" or a character
4348 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004349 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004350 *
4351 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4352 */
4353
4354void
4355xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004356 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004357 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004358 int line = ctxt->input->line;
4359 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004360 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004361
4362 SHRINK;
4363 GROW;
4364 /*
4365 * Accelerated common case where input don't need to be
4366 * modified before passing it to the handler.
4367 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004368 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004369 in = ctxt->input->cur;
4370 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004371get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004372 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004373 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004374 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004375 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004376 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004377 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004378 goto get_more_space;
4379 }
4380 if (*in == '<') {
4381 nbchar = in - ctxt->input->cur;
4382 if (nbchar > 0) {
4383 const xmlChar *tmp = ctxt->input->cur;
4384 ctxt->input->cur = in;
4385
Daniel Veillard34099b42004-11-04 17:34:35 +00004386 if ((ctxt->sax != NULL) &&
4387 (ctxt->sax->ignorableWhitespace !=
4388 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004389 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004390 if (ctxt->sax->ignorableWhitespace != NULL)
4391 ctxt->sax->ignorableWhitespace(ctxt->userData,
4392 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004393 } else {
4394 if (ctxt->sax->characters != NULL)
4395 ctxt->sax->characters(ctxt->userData,
4396 tmp, nbchar);
4397 if (*ctxt->space == -1)
4398 *ctxt->space = -2;
4399 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004400 } else if ((ctxt->sax != NULL) &&
4401 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004402 ctxt->sax->characters(ctxt->userData,
4403 tmp, nbchar);
4404 }
4405 }
4406 return;
4407 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004408
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004409get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004410 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004411 while (test_char_data[*in]) {
4412 in++;
4413 ccol++;
4414 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004415 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004416 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004417 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004418 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004419 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004420 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004421 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004422 }
4423 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004424 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004425 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004426 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004427 return;
4428 }
4429 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004430 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004431 goto get_more;
4432 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004433 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004434 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004435 if ((ctxt->sax != NULL) &&
4436 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004437 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004438 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004439 const xmlChar *tmp = ctxt->input->cur;
4440 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004441
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004442 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004443 if (ctxt->sax->ignorableWhitespace != NULL)
4444 ctxt->sax->ignorableWhitespace(ctxt->userData,
4445 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004446 } else {
4447 if (ctxt->sax->characters != NULL)
4448 ctxt->sax->characters(ctxt->userData,
4449 tmp, nbchar);
4450 if (*ctxt->space == -1)
4451 *ctxt->space = -2;
4452 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004453 line = ctxt->input->line;
4454 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004455 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004456 if (ctxt->sax->characters != NULL)
4457 ctxt->sax->characters(ctxt->userData,
4458 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004459 line = ctxt->input->line;
4460 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004461 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004462 /* something really bad happened in the SAX callback */
4463 if (ctxt->instate != XML_PARSER_CONTENT)
4464 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004465 }
4466 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004467 if (*in == 0xD) {
4468 in++;
4469 if (*in == 0xA) {
4470 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004471 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004472 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004473 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004474 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004475 in--;
4476 }
4477 if (*in == '<') {
4478 return;
4479 }
4480 if (*in == '&') {
4481 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004482 }
4483 SHRINK;
4484 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004485 if (ctxt->instate == XML_PARSER_EOF)
4486 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004487 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004488 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004489 nbchar = 0;
4490 }
Daniel Veillard50582112001-03-26 22:52:16 +00004491 ctxt->input->line = line;
4492 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004493 xmlParseCharDataComplex(ctxt, cdata);
4494}
4495
Daniel Veillard01c13b52002-12-10 15:19:08 +00004496/**
4497 * xmlParseCharDataComplex:
4498 * @ctxt: an XML parser context
4499 * @cdata: int indicating whether we are within a CDATA section
4500 *
4501 * parse a CharData section.this is the fallback function
4502 * of xmlParseCharData() when the parsing requires handling
4503 * of non-ASCII characters.
4504 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004505static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004506xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004507 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4508 int nbchar = 0;
4509 int cur, l;
4510 int count = 0;
4511
4512 SHRINK;
4513 GROW;
4514 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004515 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004516 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004517 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004518 if ((cur == ']') && (NXT(1) == ']') &&
4519 (NXT(2) == '>')) {
4520 if (cdata) break;
4521 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004522 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004523 }
4524 }
4525 COPY_BUF(l,buf,nbchar,cur);
4526 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004527 buf[nbchar] = 0;
4528
Owen Taylor3473f882001-02-23 17:55:21 +00004529 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004530 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004531 */
4532 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004533 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004534 if (ctxt->sax->ignorableWhitespace != NULL)
4535 ctxt->sax->ignorableWhitespace(ctxt->userData,
4536 buf, nbchar);
4537 } else {
4538 if (ctxt->sax->characters != NULL)
4539 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004540 if ((ctxt->sax->characters !=
4541 ctxt->sax->ignorableWhitespace) &&
4542 (*ctxt->space == -1))
4543 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004544 }
4545 }
4546 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004547 /* something really bad happened in the SAX callback */
4548 if (ctxt->instate != XML_PARSER_CONTENT)
4549 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004550 }
4551 count++;
4552 if (count > 50) {
4553 GROW;
4554 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004555 if (ctxt->instate == XML_PARSER_EOF)
4556 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004557 }
4558 NEXTL(l);
4559 cur = CUR_CHAR(l);
4560 }
4561 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004562 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004563 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004564 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004565 */
4566 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004567 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004568 if (ctxt->sax->ignorableWhitespace != NULL)
4569 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4570 } else {
4571 if (ctxt->sax->characters != NULL)
4572 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004573 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4574 (*ctxt->space == -1))
4575 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004576 }
4577 }
4578 }
Nick Wellnhofer69936b12017-08-30 14:16:01 +02004579 if ((cur != 0) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004580 /* Generate the error and skip the offending character */
4581 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4582 "PCDATA invalid Char value %d\n",
4583 cur);
4584 NEXTL(l);
4585 }
Owen Taylor3473f882001-02-23 17:55:21 +00004586}
4587
4588/**
4589 * xmlParseExternalID:
4590 * @ctxt: an XML parser context
4591 * @publicID: a xmlChar** receiving PubidLiteral
4592 * @strict: indicate whether we should restrict parsing to only
4593 * production [75], see NOTE below
4594 *
4595 * Parse an External ID or a Public ID
4596 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004597 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004598 * 'PUBLIC' S PubidLiteral S SystemLiteral
4599 *
4600 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4601 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4602 *
4603 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4604 *
4605 * Returns the function returns SystemLiteral and in the second
4606 * case publicID receives PubidLiteral, is strict is off
4607 * it is possible to return NULL and have publicID set.
4608 */
4609
4610xmlChar *
4611xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4612 xmlChar *URI = NULL;
4613
4614 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004615
4616 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004617 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004618 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004619 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004620 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4621 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004622 }
Owen Taylor3473f882001-02-23 17:55:21 +00004623 URI = xmlParseSystemLiteral(ctxt);
4624 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004625 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004626 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004627 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004628 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004629 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004631 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004632 }
Owen Taylor3473f882001-02-23 17:55:21 +00004633 *publicID = xmlParsePubidLiteral(ctxt);
4634 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004635 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004636 }
4637 if (strict) {
4638 /*
4639 * We don't handle [83] so "S SystemLiteral" is required.
4640 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004641 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004642 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004643 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004644 }
4645 } else {
4646 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004647 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004648 * "S SystemLiteral" is not detected. We skip blanks if no
4649 * system literal was found, but this is harmless since we must
4650 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004651 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004652 if (SKIP_BLANKS == 0) return(NULL);
4653 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004654 }
Owen Taylor3473f882001-02-23 17:55:21 +00004655 URI = xmlParseSystemLiteral(ctxt);
4656 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004657 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004658 }
4659 }
4660 return(URI);
4661}
4662
4663/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004664 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004665 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004666 * @buf: the already parsed part of the buffer
4667 * @len: number of bytes filles in the buffer
4668 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004669 *
4670 * Skip an XML (SGML) comment <!-- .... -->
4671 * The spec says that "For compatibility, the string "--" (double-hyphen)
4672 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004673 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004674 *
4675 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4676 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004677static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004678xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4679 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004680 int q, ql;
4681 int r, rl;
4682 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004683 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004684 int inputid;
4685
4686 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004687
Owen Taylor3473f882001-02-23 17:55:21 +00004688 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004689 len = 0;
4690 size = XML_PARSER_BUFFER_SIZE;
4691 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4692 if (buf == NULL) {
4693 xmlErrMemory(ctxt, NULL);
4694 return;
4695 }
Owen Taylor3473f882001-02-23 17:55:21 +00004696 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004697 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004698 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004699 if (q == 0)
4700 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004701 if (!IS_CHAR(q)) {
4702 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4703 "xmlParseComment: invalid xmlChar value %d\n",
4704 q);
4705 xmlFree (buf);
4706 return;
4707 }
Owen Taylor3473f882001-02-23 17:55:21 +00004708 NEXTL(ql);
4709 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004710 if (r == 0)
4711 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004712 if (!IS_CHAR(r)) {
4713 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4714 "xmlParseComment: invalid xmlChar value %d\n",
4715 q);
4716 xmlFree (buf);
4717 return;
4718 }
Owen Taylor3473f882001-02-23 17:55:21 +00004719 NEXTL(rl);
4720 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004721 if (cur == 0)
4722 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004723 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004724 ((cur != '>') ||
4725 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004726 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004727 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004728 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004729 if ((len > XML_MAX_TEXT_LENGTH) &&
4730 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4731 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4732 "Comment too big found", NULL);
4733 xmlFree (buf);
4734 return;
4735 }
Owen Taylor3473f882001-02-23 17:55:21 +00004736 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004737 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004738 size_t new_size;
4739
4740 new_size = size * 2;
4741 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004742 if (new_buf == NULL) {
4743 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004744 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004745 return;
4746 }
William M. Bracka3215c72004-07-31 16:24:01 +00004747 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004748 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004749 }
4750 COPY_BUF(ql,buf,len,q);
4751 q = r;
4752 ql = rl;
4753 r = cur;
4754 rl = l;
4755
4756 count++;
4757 if (count > 50) {
4758 GROW;
4759 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004760 if (ctxt->instate == XML_PARSER_EOF) {
4761 xmlFree(buf);
4762 return;
4763 }
Owen Taylor3473f882001-02-23 17:55:21 +00004764 }
4765 NEXTL(l);
4766 cur = CUR_CHAR(l);
4767 if (cur == 0) {
4768 SHRINK;
4769 GROW;
4770 cur = CUR_CHAR(l);
4771 }
4772 }
4773 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004774 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004775 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004776 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004777 } else if (!IS_CHAR(cur)) {
4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779 "xmlParseComment: invalid xmlChar value %d\n",
4780 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004781 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004782 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004783 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004784 "Comment doesn't start and stop in the same"
4785 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004786 }
4787 NEXT;
4788 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4789 (!ctxt->disableSAX))
4790 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004791 }
Daniel Veillardda629342007-08-01 07:49:06 +00004792 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004793 return;
4794not_terminated:
4795 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4796 "Comment not terminated\n", NULL);
4797 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004798 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004799}
Daniel Veillardda629342007-08-01 07:49:06 +00004800
Daniel Veillard4c778d82005-01-23 17:37:44 +00004801/**
4802 * xmlParseComment:
4803 * @ctxt: an XML parser context
4804 *
4805 * Skip an XML (SGML) comment <!-- .... -->
4806 * The spec says that "For compatibility, the string "--" (double-hyphen)
4807 * must not occur within comments. "
4808 *
4809 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4810 */
4811void
4812xmlParseComment(xmlParserCtxtPtr ctxt) {
4813 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004814 size_t size = XML_PARSER_BUFFER_SIZE;
4815 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004816 xmlParserInputState state;
4817 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004818 size_t nbchar = 0;
4819 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004820 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004821
4822 /*
4823 * Check that there is a comment right here.
4824 */
4825 if ((RAW != '<') || (NXT(1) != '!') ||
4826 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004827 state = ctxt->instate;
4828 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004829 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004830 SKIP(4);
4831 SHRINK;
4832 GROW;
4833
4834 /*
4835 * Accelerated common case where input don't need to be
4836 * modified before passing it to the handler.
4837 */
4838 in = ctxt->input->cur;
4839 do {
4840 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004841 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004842 ctxt->input->line++; ctxt->input->col = 1;
4843 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004844 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004845 }
4846get_more:
4847 ccol = ctxt->input->col;
4848 while (((*in > '-') && (*in <= 0x7F)) ||
4849 ((*in >= 0x20) && (*in < '-')) ||
4850 (*in == 0x09)) {
4851 in++;
4852 ccol++;
4853 }
4854 ctxt->input->col = ccol;
4855 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004856 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004857 ctxt->input->line++; ctxt->input->col = 1;
4858 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004859 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004860 goto get_more;
4861 }
4862 nbchar = in - ctxt->input->cur;
4863 /*
4864 * save current set of data
4865 */
4866 if (nbchar > 0) {
4867 if ((ctxt->sax != NULL) &&
4868 (ctxt->sax->comment != NULL)) {
4869 if (buf == NULL) {
4870 if ((*in == '-') && (in[1] == '-'))
4871 size = nbchar + 1;
4872 else
4873 size = XML_PARSER_BUFFER_SIZE + nbchar;
4874 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4875 if (buf == NULL) {
4876 xmlErrMemory(ctxt, NULL);
4877 ctxt->instate = state;
4878 return;
4879 }
4880 len = 0;
4881 } else if (len + nbchar + 1 >= size) {
4882 xmlChar *new_buf;
4883 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4884 new_buf = (xmlChar *) xmlRealloc(buf,
4885 size * sizeof(xmlChar));
4886 if (new_buf == NULL) {
4887 xmlFree (buf);
4888 xmlErrMemory(ctxt, NULL);
4889 ctxt->instate = state;
4890 return;
4891 }
4892 buf = new_buf;
4893 }
4894 memcpy(&buf[len], ctxt->input->cur, nbchar);
4895 len += nbchar;
4896 buf[len] = 0;
4897 }
4898 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004899 if ((len > XML_MAX_TEXT_LENGTH) &&
4900 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4901 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902 "Comment too big found", NULL);
4903 xmlFree (buf);
4904 return;
4905 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004906 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004907 if (*in == 0xA) {
4908 in++;
4909 ctxt->input->line++; ctxt->input->col = 1;
4910 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004911 if (*in == 0xD) {
4912 in++;
4913 if (*in == 0xA) {
4914 ctxt->input->cur = in;
4915 in++;
4916 ctxt->input->line++; ctxt->input->col = 1;
4917 continue; /* while */
4918 }
4919 in--;
4920 }
4921 SHRINK;
4922 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004923 if (ctxt->instate == XML_PARSER_EOF) {
4924 xmlFree(buf);
4925 return;
4926 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004927 in = ctxt->input->cur;
4928 if (*in == '-') {
4929 if (in[1] == '-') {
4930 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004931 if (ctxt->input->id != inputid) {
4932 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004933 "comment doesn't start and stop in the"
4934 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00004935 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004936 SKIP(3);
4937 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4938 (!ctxt->disableSAX)) {
4939 if (buf != NULL)
4940 ctxt->sax->comment(ctxt->userData, buf);
4941 else
4942 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4943 }
4944 if (buf != NULL)
4945 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08004946 if (ctxt->instate != XML_PARSER_EOF)
4947 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004948 return;
4949 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004950 if (buf != NULL) {
4951 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4952 "Double hyphen within comment: "
4953 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004954 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004955 } else
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004958 in++;
4959 ctxt->input->col++;
4960 }
4961 in++;
4962 ctxt->input->col++;
4963 goto get_more;
4964 }
4965 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4966 xmlParseCommentComplex(ctxt, buf, len, size);
4967 ctxt->instate = state;
4968 return;
4969}
4970
Owen Taylor3473f882001-02-23 17:55:21 +00004971
4972/**
4973 * xmlParsePITarget:
4974 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004975 *
Owen Taylor3473f882001-02-23 17:55:21 +00004976 * parse the name of a PI
4977 *
4978 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4979 *
4980 * Returns the PITarget name or NULL
4981 */
4982
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004983const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004984xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004985 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004986
4987 name = xmlParseName(ctxt);
4988 if ((name != NULL) &&
4989 ((name[0] == 'x') || (name[0] == 'X')) &&
4990 ((name[1] == 'm') || (name[1] == 'M')) &&
4991 ((name[2] == 'l') || (name[2] == 'L'))) {
4992 int i;
4993 if ((name[0] == 'x') && (name[1] == 'm') &&
4994 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004995 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004996 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004997 return(name);
4998 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004999 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005000 return(name);
5001 }
5002 for (i = 0;;i++) {
5003 if (xmlW3CPIs[i] == NULL) break;
5004 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5005 return(name);
5006 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005007 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5008 "xmlParsePITarget: invalid name prefix 'xml'\n",
5009 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005010 }
Daniel Veillard37334572008-07-31 08:20:02 +00005011 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005012 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005013 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005014 }
Owen Taylor3473f882001-02-23 17:55:21 +00005015 return(name);
5016}
5017
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005018#ifdef LIBXML_CATALOG_ENABLED
5019/**
5020 * xmlParseCatalogPI:
5021 * @ctxt: an XML parser context
5022 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005023 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005024 * parse an XML Catalog Processing Instruction.
5025 *
5026 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5027 *
5028 * Occurs only if allowed by the user and if happening in the Misc
5029 * part of the document before any doctype informations
5030 * This will add the given catalog to the parsing context in order
5031 * to be used if there is a resolution need further down in the document
5032 */
5033
5034static void
5035xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5036 xmlChar *URL = NULL;
5037 const xmlChar *tmp, *base;
5038 xmlChar marker;
5039
5040 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005041 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005042 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5043 goto error;
5044 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005045 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005046 if (*tmp != '=') {
5047 return;
5048 }
5049 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005050 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005051 marker = *tmp;
5052 if ((marker != '\'') && (marker != '"'))
5053 goto error;
5054 tmp++;
5055 base = tmp;
5056 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5057 if (*tmp == 0)
5058 goto error;
5059 URL = xmlStrndup(base, tmp - base);
5060 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005061 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005062 if (*tmp != 0)
5063 goto error;
5064
5065 if (URL != NULL) {
5066 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5067 xmlFree(URL);
5068 }
5069 return;
5070
5071error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005072 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5073 "Catalog PI syntax error: %s\n",
5074 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005075 if (URL != NULL)
5076 xmlFree(URL);
5077}
5078#endif
5079
Owen Taylor3473f882001-02-23 17:55:21 +00005080/**
5081 * xmlParsePI:
5082 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005083 *
Owen Taylor3473f882001-02-23 17:55:21 +00005084 * parse an XML Processing Instruction.
5085 *
5086 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5087 *
5088 * The processing is transfered to SAX once parsed.
5089 */
5090
5091void
5092xmlParsePI(xmlParserCtxtPtr ctxt) {
5093 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005094 size_t len = 0;
5095 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005096 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005097 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005098 xmlParserInputState state;
5099 int count = 0;
5100
5101 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005102 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005103 state = ctxt->instate;
5104 ctxt->instate = XML_PARSER_PI;
5105 /*
5106 * this is a Processing Instruction.
5107 */
5108 SKIP(2);
5109 SHRINK;
5110
5111 /*
5112 * Parse the target name and check for special support like
5113 * namespace.
5114 */
5115 target = xmlParsePITarget(ctxt);
5116 if (target != NULL) {
5117 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005118 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005119 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005120 "PI declaration doesn't start and stop in"
5121 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005122 }
5123 SKIP(2);
5124
5125 /*
5126 * SAX: PI detected.
5127 */
5128 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5129 (ctxt->sax->processingInstruction != NULL))
5130 ctxt->sax->processingInstruction(ctxt->userData,
5131 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005132 if (ctxt->instate != XML_PARSER_EOF)
5133 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005134 return;
5135 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005136 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005137 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005138 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005139 ctxt->instate = state;
5140 return;
5141 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005142 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005143 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5144 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005145 }
Owen Taylor3473f882001-02-23 17:55:21 +00005146 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005147 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005148 ((cur != '?') || (NXT(1) != '>'))) {
5149 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005150 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005151 size_t new_size = size * 2;
5152 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005153 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005154 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005155 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005156 ctxt->instate = state;
5157 return;
5158 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005159 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005160 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 }
5162 count++;
5163 if (count > 50) {
5164 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005165 if (ctxt->instate == XML_PARSER_EOF) {
5166 xmlFree(buf);
5167 return;
5168 }
Owen Taylor3473f882001-02-23 17:55:21 +00005169 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005170 if ((len > XML_MAX_TEXT_LENGTH) &&
5171 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5172 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5173 "PI %s too big found", target);
5174 xmlFree(buf);
5175 ctxt->instate = state;
5176 return;
5177 }
Owen Taylor3473f882001-02-23 17:55:21 +00005178 }
5179 COPY_BUF(l,buf,len,cur);
5180 NEXTL(l);
5181 cur = CUR_CHAR(l);
5182 if (cur == 0) {
5183 SHRINK;
5184 GROW;
5185 cur = CUR_CHAR(l);
5186 }
5187 }
Daniel Veillard51304812012-07-19 20:34:26 +08005188 if ((len > XML_MAX_TEXT_LENGTH) &&
5189 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5190 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5191 "PI %s too big found", target);
5192 xmlFree(buf);
5193 ctxt->instate = state;
5194 return;
5195 }
Owen Taylor3473f882001-02-23 17:55:21 +00005196 buf[len] = 0;
5197 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005198 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5199 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005201 if (inputid != ctxt->input->id) {
5202 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5203 "PI declaration doesn't start and stop in"
5204 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005205 }
5206 SKIP(2);
5207
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005208#ifdef LIBXML_CATALOG_ENABLED
5209 if (((state == XML_PARSER_MISC) ||
5210 (state == XML_PARSER_START)) &&
5211 (xmlStrEqual(target, XML_CATALOG_PI))) {
5212 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5213 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5214 (allow == XML_CATA_ALLOW_ALL))
5215 xmlParseCatalogPI(ctxt, buf);
5216 }
5217#endif
5218
5219
Owen Taylor3473f882001-02-23 17:55:21 +00005220 /*
5221 * SAX: PI detected.
5222 */
5223 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224 (ctxt->sax->processingInstruction != NULL))
5225 ctxt->sax->processingInstruction(ctxt->userData,
5226 target, buf);
5227 }
5228 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005230 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005231 }
Chris Evans77404b82011-12-14 16:18:25 +08005232 if (ctxt->instate != XML_PARSER_EOF)
5233 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005234 }
5235}
5236
5237/**
5238 * xmlParseNotationDecl:
5239 * @ctxt: an XML parser context
5240 *
5241 * parse a notation declaration
5242 *
5243 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5244 *
5245 * Hence there is actually 3 choices:
5246 * 'PUBLIC' S PubidLiteral
5247 * 'PUBLIC' S PubidLiteral S SystemLiteral
5248 * and 'SYSTEM' S SystemLiteral
5249 *
5250 * See the NOTE on xmlParseExternalID().
5251 */
5252
5253void
5254xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005255 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005256 xmlChar *Pubid;
5257 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005258
Daniel Veillarda07050d2003-10-19 14:46:32 +00005259 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005260 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 SHRINK;
5262 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005263 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005264 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5265 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005266 return;
5267 }
Owen Taylor3473f882001-02-23 17:55:21 +00005268
Daniel Veillard76d66f42001-05-16 21:05:17 +00005269 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005271 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005272 return;
5273 }
Daniel Veillard37334572008-07-31 08:20:02 +00005274 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005275 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005276 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005277 name, NULL, NULL);
5278 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005279 if (SKIP_BLANKS == 0) {
5280 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5281 "Space required after the NOTATION name'\n");
5282 return;
5283 }
Owen Taylor3473f882001-02-23 17:55:21 +00005284
5285 /*
5286 * Parse the IDs.
5287 */
5288 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5289 SKIP_BLANKS;
5290
5291 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005292 if (inputid != ctxt->input->id) {
5293 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5294 "Notation declaration doesn't start and stop"
5295 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005296 }
5297 NEXT;
5298 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5299 (ctxt->sax->notationDecl != NULL))
5300 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5301 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005302 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005303 }
Owen Taylor3473f882001-02-23 17:55:21 +00005304 if (Systemid != NULL) xmlFree(Systemid);
5305 if (Pubid != NULL) xmlFree(Pubid);
5306 }
5307}
5308
5309/**
5310 * xmlParseEntityDecl:
5311 * @ctxt: an XML parser context
5312 *
5313 * parse <!ENTITY declarations
5314 *
5315 * [70] EntityDecl ::= GEDecl | PEDecl
5316 *
5317 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5318 *
5319 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5320 *
5321 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5322 *
5323 * [74] PEDef ::= EntityValue | ExternalID
5324 *
5325 * [76] NDataDecl ::= S 'NDATA' S Name
5326 *
5327 * [ VC: Notation Declared ]
5328 * The Name must match the declared name of a notation.
5329 */
5330
5331void
5332xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005333 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005334 xmlChar *value = NULL;
5335 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005336 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005337 int isParameter = 0;
5338 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005339
Daniel Veillard4c778d82005-01-23 17:37:44 +00005340 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005341 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005342 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005343 SHRINK;
5344 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005345 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005346 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5347 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005348 }
Owen Taylor3473f882001-02-23 17:55:21 +00005349
5350 if (RAW == '%') {
5351 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005352 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005354 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005355 }
Owen Taylor3473f882001-02-23 17:55:21 +00005356 isParameter = 1;
5357 }
5358
Daniel Veillard76d66f42001-05-16 21:05:17 +00005359 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005361 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5362 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005363 return;
5364 }
Daniel Veillard37334572008-07-31 08:20:02 +00005365 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005366 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005367 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005368 name, NULL, NULL);
5369 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005370 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005371 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5372 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005373 }
Owen Taylor3473f882001-02-23 17:55:21 +00005374
Daniel Veillardf5582f12002-06-11 10:08:16 +00005375 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005376 /*
5377 * handle the various case of definitions...
5378 */
5379 if (isParameter) {
5380 if ((RAW == '"') || (RAW == '\'')) {
5381 value = xmlParseEntityValue(ctxt, &orig);
5382 if (value) {
5383 if ((ctxt->sax != NULL) &&
5384 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5385 ctxt->sax->entityDecl(ctxt->userData, name,
5386 XML_INTERNAL_PARAMETER_ENTITY,
5387 NULL, NULL, value);
5388 }
5389 } else {
5390 URI = xmlParseExternalID(ctxt, &literal, 1);
5391 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005392 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005393 }
5394 if (URI) {
5395 xmlURIPtr uri;
5396
5397 uri = xmlParseURI((const char *) URI);
5398 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005399 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5400 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005401 /*
5402 * This really ought to be a well formedness error
5403 * but the XML Core WG decided otherwise c.f. issue
5404 * E26 of the XML erratas.
5405 */
Owen Taylor3473f882001-02-23 17:55:21 +00005406 } else {
5407 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005408 /*
5409 * Okay this is foolish to block those but not
5410 * invalid URIs.
5411 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005412 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005413 } else {
5414 if ((ctxt->sax != NULL) &&
5415 (!ctxt->disableSAX) &&
5416 (ctxt->sax->entityDecl != NULL))
5417 ctxt->sax->entityDecl(ctxt->userData, name,
5418 XML_EXTERNAL_PARAMETER_ENTITY,
5419 literal, URI, NULL);
5420 }
5421 xmlFreeURI(uri);
5422 }
5423 }
5424 }
5425 } else {
5426 if ((RAW == '"') || (RAW == '\'')) {
5427 value = xmlParseEntityValue(ctxt, &orig);
5428 if ((ctxt->sax != NULL) &&
5429 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5430 ctxt->sax->entityDecl(ctxt->userData, name,
5431 XML_INTERNAL_GENERAL_ENTITY,
5432 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005433 /*
5434 * For expat compatibility in SAX mode.
5435 */
5436 if ((ctxt->myDoc == NULL) ||
5437 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5438 if (ctxt->myDoc == NULL) {
5439 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005440 if (ctxt->myDoc == NULL) {
5441 xmlErrMemory(ctxt, "New Doc failed");
5442 return;
5443 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005444 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005445 }
5446 if (ctxt->myDoc->intSubset == NULL)
5447 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5448 BAD_CAST "fake", NULL, NULL);
5449
Daniel Veillard1af9a412003-08-20 22:54:39 +00005450 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5451 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005452 }
Owen Taylor3473f882001-02-23 17:55:21 +00005453 } else {
5454 URI = xmlParseExternalID(ctxt, &literal, 1);
5455 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005456 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005457 }
5458 if (URI) {
5459 xmlURIPtr uri;
5460
5461 uri = xmlParseURI((const char *)URI);
5462 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005463 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5464 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005465 /*
5466 * This really ought to be a well formedness error
5467 * but the XML Core WG decided otherwise c.f. issue
5468 * E26 of the XML erratas.
5469 */
Owen Taylor3473f882001-02-23 17:55:21 +00005470 } else {
5471 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005472 /*
5473 * Okay this is foolish to block those but not
5474 * invalid URIs.
5475 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005476 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005477 }
5478 xmlFreeURI(uri);
5479 }
5480 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005481 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5483 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005484 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005485 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005486 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005487 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5489 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005490 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005491 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005492 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5493 (ctxt->sax->unparsedEntityDecl != NULL))
5494 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5495 literal, URI, ndata);
5496 } else {
5497 if ((ctxt->sax != NULL) &&
5498 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499 ctxt->sax->entityDecl(ctxt->userData, name,
5500 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5501 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005502 /*
5503 * For expat compatibility in SAX mode.
5504 * assuming the entity repalcement was asked for
5505 */
5506 if ((ctxt->replaceEntities != 0) &&
5507 ((ctxt->myDoc == NULL) ||
5508 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5509 if (ctxt->myDoc == NULL) {
5510 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005511 if (ctxt->myDoc == NULL) {
5512 xmlErrMemory(ctxt, "New Doc failed");
5513 return;
5514 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005515 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005516 }
5517
5518 if (ctxt->myDoc->intSubset == NULL)
5519 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5520 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005521 xmlSAX2EntityDecl(ctxt, name,
5522 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5523 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005524 }
Owen Taylor3473f882001-02-23 17:55:21 +00005525 }
5526 }
5527 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005528 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005529 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005530 SKIP_BLANKS;
5531 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005532 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005533 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005534 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005535 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005536 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005537 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005538 "Entity declaration doesn't start and stop in"
5539 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005540 }
5541 NEXT;
5542 }
5543 if (orig != NULL) {
5544 /*
5545 * Ugly mechanism to save the raw entity value.
5546 */
5547 xmlEntityPtr cur = NULL;
5548
5549 if (isParameter) {
5550 if ((ctxt->sax != NULL) &&
5551 (ctxt->sax->getParameterEntity != NULL))
5552 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5553 } else {
5554 if ((ctxt->sax != NULL) &&
5555 (ctxt->sax->getEntity != NULL))
5556 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005557 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005558 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005559 }
Owen Taylor3473f882001-02-23 17:55:21 +00005560 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005561 if ((cur != NULL) && (cur->orig == NULL)) {
5562 cur->orig = orig;
5563 orig = NULL;
5564 }
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005566
5567done:
Owen Taylor3473f882001-02-23 17:55:21 +00005568 if (value != NULL) xmlFree(value);
5569 if (URI != NULL) xmlFree(URI);
5570 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005571 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005572 }
5573}
5574
5575/**
5576 * xmlParseDefaultDecl:
5577 * @ctxt: an XML parser context
5578 * @value: Receive a possible fixed default value for the attribute
5579 *
5580 * Parse an attribute default declaration
5581 *
5582 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5583 *
5584 * [ VC: Required Attribute ]
5585 * if the default declaration is the keyword #REQUIRED, then the
5586 * attribute must be specified for all elements of the type in the
5587 * attribute-list declaration.
5588 *
5589 * [ VC: Attribute Default Legal ]
5590 * The declared default value must meet the lexical constraints of
5591 * the declared attribute type c.f. xmlValidateAttributeDecl()
5592 *
5593 * [ VC: Fixed Attribute Default ]
5594 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005595 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005596 *
5597 * [ WFC: No < in Attribute Values ]
5598 * handled in xmlParseAttValue()
5599 *
5600 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005601 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005602 */
5603
5604int
5605xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5606 int val;
5607 xmlChar *ret;
5608
5609 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005610 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005611 SKIP(9);
5612 return(XML_ATTRIBUTE_REQUIRED);
5613 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005614 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005615 SKIP(8);
5616 return(XML_ATTRIBUTE_IMPLIED);
5617 }
5618 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005619 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005620 SKIP(6);
5621 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005622 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005623 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5624 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005625 }
Owen Taylor3473f882001-02-23 17:55:21 +00005626 }
5627 ret = xmlParseAttValue(ctxt);
5628 ctxt->instate = XML_PARSER_DTD;
5629 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005630 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005631 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005632 } else
5633 *value = ret;
5634 return(val);
5635}
5636
5637/**
5638 * xmlParseNotationType:
5639 * @ctxt: an XML parser context
5640 *
5641 * parse an Notation attribute type.
5642 *
5643 * Note: the leading 'NOTATION' S part has already being parsed...
5644 *
5645 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5646 *
5647 * [ VC: Notation Attributes ]
5648 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005649 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005650 *
5651 * Returns: the notation attribute tree built while parsing
5652 */
5653
5654xmlEnumerationPtr
5655xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005656 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005657 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005658
5659 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005660 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005661 return(NULL);
5662 }
5663 SHRINK;
5664 do {
5665 NEXT;
5666 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005667 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005669 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5670 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005671 xmlFreeEnumeration(ret);
5672 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005673 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005674 tmp = ret;
5675 while (tmp != NULL) {
5676 if (xmlStrEqual(name, tmp->name)) {
5677 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5678 "standalone: attribute notation value token %s duplicated\n",
5679 name, NULL);
5680 if (!xmlDictOwns(ctxt->dict, name))
5681 xmlFree((xmlChar *) name);
5682 break;
5683 }
5684 tmp = tmp->next;
5685 }
5686 if (tmp == NULL) {
5687 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005688 if (cur == NULL) {
5689 xmlFreeEnumeration(ret);
5690 return(NULL);
5691 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005692 if (last == NULL) ret = last = cur;
5693 else {
5694 last->next = cur;
5695 last = cur;
5696 }
Owen Taylor3473f882001-02-23 17:55:21 +00005697 }
5698 SKIP_BLANKS;
5699 } while (RAW == '|');
5700 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005701 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005702 xmlFreeEnumeration(ret);
5703 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005704 }
5705 NEXT;
5706 return(ret);
5707}
5708
5709/**
5710 * xmlParseEnumerationType:
5711 * @ctxt: an XML parser context
5712 *
5713 * parse an Enumeration attribute type.
5714 *
5715 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5716 *
5717 * [ VC: Enumeration ]
5718 * Values of this type must match one of the Nmtoken tokens in
5719 * the declaration
5720 *
5721 * Returns: the enumeration attribute tree built while parsing
5722 */
5723
5724xmlEnumerationPtr
5725xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5726 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005727 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005728
5729 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005730 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005731 return(NULL);
5732 }
5733 SHRINK;
5734 do {
5735 NEXT;
5736 SKIP_BLANKS;
5737 name = xmlParseNmtoken(ctxt);
5738 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005739 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005740 return(ret);
5741 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005742 tmp = ret;
5743 while (tmp != NULL) {
5744 if (xmlStrEqual(name, tmp->name)) {
5745 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5746 "standalone: attribute enumeration value token %s duplicated\n",
5747 name, NULL);
5748 if (!xmlDictOwns(ctxt->dict, name))
5749 xmlFree(name);
5750 break;
5751 }
5752 tmp = tmp->next;
5753 }
5754 if (tmp == NULL) {
5755 cur = xmlCreateEnumeration(name);
5756 if (!xmlDictOwns(ctxt->dict, name))
5757 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005758 if (cur == NULL) {
5759 xmlFreeEnumeration(ret);
5760 return(NULL);
5761 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005762 if (last == NULL) ret = last = cur;
5763 else {
5764 last->next = cur;
5765 last = cur;
5766 }
Owen Taylor3473f882001-02-23 17:55:21 +00005767 }
5768 SKIP_BLANKS;
5769 } while (RAW == '|');
5770 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005771 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005772 return(ret);
5773 }
5774 NEXT;
5775 return(ret);
5776}
5777
5778/**
5779 * xmlParseEnumeratedType:
5780 * @ctxt: an XML parser context
5781 * @tree: the enumeration tree built while parsing
5782 *
5783 * parse an Enumerated attribute type.
5784 *
5785 * [57] EnumeratedType ::= NotationType | Enumeration
5786 *
5787 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5788 *
5789 *
5790 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5791 */
5792
5793int
5794xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005795 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005796 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005797 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005798 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5799 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005800 return(0);
5801 }
Owen Taylor3473f882001-02-23 17:55:21 +00005802 *tree = xmlParseNotationType(ctxt);
5803 if (*tree == NULL) return(0);
5804 return(XML_ATTRIBUTE_NOTATION);
5805 }
5806 *tree = xmlParseEnumerationType(ctxt);
5807 if (*tree == NULL) return(0);
5808 return(XML_ATTRIBUTE_ENUMERATION);
5809}
5810
5811/**
5812 * xmlParseAttributeType:
5813 * @ctxt: an XML parser context
5814 * @tree: the enumeration tree built while parsing
5815 *
5816 * parse the Attribute list def for an element
5817 *
5818 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5819 *
5820 * [55] StringType ::= 'CDATA'
5821 *
5822 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5823 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5824 *
5825 * Validity constraints for attribute values syntax are checked in
5826 * xmlValidateAttributeValue()
5827 *
5828 * [ VC: ID ]
5829 * Values of type ID must match the Name production. A name must not
5830 * appear more than once in an XML document as a value of this type;
5831 * i.e., ID values must uniquely identify the elements which bear them.
5832 *
5833 * [ VC: One ID per Element Type ]
5834 * No element type may have more than one ID attribute specified.
5835 *
5836 * [ VC: ID Attribute Default ]
5837 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5838 *
5839 * [ VC: IDREF ]
5840 * Values of type IDREF must match the Name production, and values
5841 * of type IDREFS must match Names; each IDREF Name must match the value
5842 * of an ID attribute on some element in the XML document; i.e. IDREF
5843 * values must match the value of some ID attribute.
5844 *
5845 * [ VC: Entity Name ]
5846 * Values of type ENTITY must match the Name production, values
5847 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005848 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005849 *
5850 * [ VC: Name Token ]
5851 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005852 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005853 *
5854 * Returns the attribute type
5855 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005856int
Owen Taylor3473f882001-02-23 17:55:21 +00005857xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5858 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005859 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005860 SKIP(5);
5861 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005862 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005863 SKIP(6);
5864 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005865 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005866 SKIP(5);
5867 return(XML_ATTRIBUTE_IDREF);
5868 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5869 SKIP(2);
5870 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005871 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005872 SKIP(6);
5873 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005874 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005875 SKIP(8);
5876 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005877 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005878 SKIP(8);
5879 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005880 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005881 SKIP(7);
5882 return(XML_ATTRIBUTE_NMTOKEN);
5883 }
5884 return(xmlParseEnumeratedType(ctxt, tree));
5885}
5886
5887/**
5888 * xmlParseAttributeListDecl:
5889 * @ctxt: an XML parser context
5890 *
5891 * : parse the Attribute list def for an element
5892 *
5893 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5894 *
5895 * [53] AttDef ::= S Name S AttType S DefaultDecl
5896 *
5897 */
5898void
5899xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005900 const xmlChar *elemName;
5901 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005902 xmlEnumerationPtr tree;
5903
Daniel Veillarda07050d2003-10-19 14:46:32 +00005904 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005905 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005906
5907 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005908 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005909 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005910 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005911 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005912 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005913 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005914 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5915 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005916 return;
5917 }
5918 SKIP_BLANKS;
5919 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005920 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005921 int type;
5922 int def;
5923 xmlChar *defaultValue = NULL;
5924
5925 GROW;
5926 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005927 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005928 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005929 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5930 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005931 break;
5932 }
5933 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005934 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005935 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005936 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005937 break;
5938 }
Owen Taylor3473f882001-02-23 17:55:21 +00005939
5940 type = xmlParseAttributeType(ctxt, &tree);
5941 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005942 break;
5943 }
5944
5945 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005946 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5948 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005949 if (tree != NULL)
5950 xmlFreeEnumeration(tree);
5951 break;
5952 }
Owen Taylor3473f882001-02-23 17:55:21 +00005953
5954 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5955 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005956 if (defaultValue != NULL)
5957 xmlFree(defaultValue);
5958 if (tree != NULL)
5959 xmlFreeEnumeration(tree);
5960 break;
5961 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005962 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5963 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005964
5965 GROW;
5966 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005967 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005968 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005969 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005970 if (defaultValue != NULL)
5971 xmlFree(defaultValue);
5972 if (tree != NULL)
5973 xmlFreeEnumeration(tree);
5974 break;
5975 }
Owen Taylor3473f882001-02-23 17:55:21 +00005976 }
Owen Taylor3473f882001-02-23 17:55:21 +00005977 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5978 (ctxt->sax->attributeDecl != NULL))
5979 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5980 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005981 else if (tree != NULL)
5982 xmlFreeEnumeration(tree);
5983
5984 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005985 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00005986 (def != XML_ATTRIBUTE_REQUIRED)) {
5987 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5988 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005989 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005990 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5991 }
Owen Taylor3473f882001-02-23 17:55:21 +00005992 if (defaultValue != NULL)
5993 xmlFree(defaultValue);
5994 GROW;
5995 }
5996 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005997 if (inputid != ctxt->input->id) {
5998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5999 "Attribute list declaration doesn't start and"
6000 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006001 }
6002 NEXT;
6003 }
Owen Taylor3473f882001-02-23 17:55:21 +00006004 }
6005}
6006
6007/**
6008 * xmlParseElementMixedContentDecl:
6009 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006010 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006011 *
6012 * parse the declaration for a Mixed Element content
6013 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006014 *
Owen Taylor3473f882001-02-23 17:55:21 +00006015 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6016 * '(' S? '#PCDATA' S? ')'
6017 *
6018 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6019 *
6020 * [ VC: No Duplicate Types ]
6021 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006022 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006023 *
6024 * returns: the list of the xmlElementContentPtr describing the element choices
6025 */
6026xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006027xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006028 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006029 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006030
6031 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006032 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006033 SKIP(7);
6034 SKIP_BLANKS;
6035 SHRINK;
6036 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006037 if (ctxt->input->id != inputchk) {
6038 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6039 "Element content declaration doesn't start and"
6040 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006041 }
Owen Taylor3473f882001-02-23 17:55:21 +00006042 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006043 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006044 if (ret == NULL)
6045 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006046 if (RAW == '*') {
6047 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6048 NEXT;
6049 }
6050 return(ret);
6051 }
6052 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006053 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006054 if (ret == NULL) return(NULL);
6055 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006056 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006057 NEXT;
6058 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006059 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006060 if (ret == NULL) return(NULL);
6061 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006062 if (cur != NULL)
6063 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006064 cur = ret;
6065 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006066 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006067 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006068 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006069 if (n->c1 != NULL)
6070 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006071 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006072 if (n != NULL)
6073 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006074 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006075 }
6076 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006077 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006078 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006079 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006080 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006081 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006082 return(NULL);
6083 }
6084 SKIP_BLANKS;
6085 GROW;
6086 }
6087 if ((RAW == ')') && (NXT(1) == '*')) {
6088 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006089 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006090 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006091 if (cur->c2 != NULL)
6092 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006093 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006094 if (ret != NULL)
6095 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006096 if (ctxt->input->id != inputchk) {
6097 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6098 "Element content declaration doesn't start and"
6099 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006100 }
Owen Taylor3473f882001-02-23 17:55:21 +00006101 SKIP(2);
6102 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006103 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006104 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006105 return(NULL);
6106 }
6107
6108 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006109 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 }
6111 return(ret);
6112}
6113
6114/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006115 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006116 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006117 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006118 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006119 *
6120 * parse the declaration for a Mixed Element content
6121 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006122 *
Owen Taylor3473f882001-02-23 17:55:21 +00006123 *
6124 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6125 *
6126 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6127 *
6128 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6129 *
6130 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6131 *
6132 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6133 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006134 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006135 * opening or closing parentheses in a choice, seq, or Mixed
6136 * construct is contained in the replacement text for a parameter
6137 * entity, both must be contained in the same replacement text. For
6138 * interoperability, if a parameter-entity reference appears in a
6139 * choice, seq, or Mixed construct, its replacement text should not
6140 * be empty, and neither the first nor last non-blank character of
6141 * the replacement text should be a connector (| or ,).
6142 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006143 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006144 * hierarchy.
6145 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006146static xmlElementContentPtr
6147xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6148 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006149 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006150 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006151 xmlChar type = 0;
6152
Daniel Veillard489f9672009-08-10 16:49:30 +02006153 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6154 (depth > 2048)) {
6155 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6156"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6157 depth);
6158 return(NULL);
6159 }
Owen Taylor3473f882001-02-23 17:55:21 +00006160 SKIP_BLANKS;
6161 GROW;
6162 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006163 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006164
Owen Taylor3473f882001-02-23 17:55:21 +00006165 /* Recurse on first child */
6166 NEXT;
6167 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006168 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6169 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006170 SKIP_BLANKS;
6171 GROW;
6172 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006173 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006174 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006175 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006176 return(NULL);
6177 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006178 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006179 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006180 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006181 return(NULL);
6182 }
Owen Taylor3473f882001-02-23 17:55:21 +00006183 GROW;
6184 if (RAW == '?') {
6185 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6186 NEXT;
6187 } else if (RAW == '*') {
6188 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6189 NEXT;
6190 } else if (RAW == '+') {
6191 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6192 NEXT;
6193 } else {
6194 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6195 }
Owen Taylor3473f882001-02-23 17:55:21 +00006196 GROW;
6197 }
6198 SKIP_BLANKS;
6199 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006200 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006201 /*
6202 * Each loop we parse one separator and one element.
6203 */
6204 if (RAW == ',') {
6205 if (type == 0) type = CUR;
6206
6207 /*
6208 * Detect "Name | Name , Name" error
6209 */
6210 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006211 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006212 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006213 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006214 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006215 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006216 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006217 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006218 return(NULL);
6219 }
6220 NEXT;
6221
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006222 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006223 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006224 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006225 xmlFreeDocElementContent(ctxt->myDoc, last);
6226 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006227 return(NULL);
6228 }
6229 if (last == NULL) {
6230 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006231 if (ret != NULL)
6232 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006233 ret = cur = op;
6234 } else {
6235 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006236 if (op != NULL)
6237 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006238 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006239 if (last != NULL)
6240 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006241 cur =op;
6242 last = NULL;
6243 }
6244 } else if (RAW == '|') {
6245 if (type == 0) type = CUR;
6246
6247 /*
6248 * Detect "Name , Name | Name" error
6249 */
6250 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006251 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006252 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006253 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006254 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006255 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006256 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006257 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006258 return(NULL);
6259 }
6260 NEXT;
6261
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006262 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006263 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006264 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006265 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006266 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006267 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 return(NULL);
6269 }
6270 if (last == NULL) {
6271 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006272 if (ret != NULL)
6273 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006274 ret = cur = op;
6275 } else {
6276 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006277 if (op != NULL)
6278 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006279 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006280 if (last != NULL)
6281 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006282 cur =op;
6283 last = NULL;
6284 }
6285 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006286 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006287 if ((last != NULL) && (last != ret))
6288 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006289 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006290 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006291 return(NULL);
6292 }
6293 GROW;
6294 SKIP_BLANKS;
6295 GROW;
6296 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006297 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006298 /* Recurse on second child */
6299 NEXT;
6300 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006301 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6302 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006303 SKIP_BLANKS;
6304 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006305 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006307 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006308 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006309 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006310 return(NULL);
6311 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006312 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006313 if (last == NULL) {
6314 if (ret != NULL)
6315 xmlFreeDocElementContent(ctxt->myDoc, ret);
6316 return(NULL);
6317 }
Owen Taylor3473f882001-02-23 17:55:21 +00006318 if (RAW == '?') {
6319 last->ocur = XML_ELEMENT_CONTENT_OPT;
6320 NEXT;
6321 } else if (RAW == '*') {
6322 last->ocur = XML_ELEMENT_CONTENT_MULT;
6323 NEXT;
6324 } else if (RAW == '+') {
6325 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6326 NEXT;
6327 } else {
6328 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6329 }
6330 }
6331 SKIP_BLANKS;
6332 GROW;
6333 }
6334 if ((cur != NULL) && (last != NULL)) {
6335 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006336 if (last != NULL)
6337 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006338 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006339 if (ctxt->input->id != inputchk) {
6340 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6341 "Element content declaration doesn't start and stop in"
6342 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006343 }
Owen Taylor3473f882001-02-23 17:55:21 +00006344 NEXT;
6345 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006346 if (ret != NULL) {
6347 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6348 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6349 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6350 else
6351 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6352 }
Owen Taylor3473f882001-02-23 17:55:21 +00006353 NEXT;
6354 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006355 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006356 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006357 cur = ret;
6358 /*
6359 * Some normalization:
6360 * (a | b* | c?)* == (a | b | c)*
6361 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006362 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006363 if ((cur->c1 != NULL) &&
6364 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6365 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6366 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6367 if ((cur->c2 != NULL) &&
6368 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6369 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6370 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6371 cur = cur->c2;
6372 }
6373 }
Owen Taylor3473f882001-02-23 17:55:21 +00006374 NEXT;
6375 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006376 if (ret != NULL) {
6377 int found = 0;
6378
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006379 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6380 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6381 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006382 else
6383 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006384 /*
6385 * Some normalization:
6386 * (a | b*)+ == (a | b)*
6387 * (a | b?)+ == (a | b)*
6388 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006389 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006390 if ((cur->c1 != NULL) &&
6391 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6392 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6393 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6394 found = 1;
6395 }
6396 if ((cur->c2 != NULL) &&
6397 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6398 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6399 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6400 found = 1;
6401 }
6402 cur = cur->c2;
6403 }
6404 if (found)
6405 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6406 }
Owen Taylor3473f882001-02-23 17:55:21 +00006407 NEXT;
6408 }
6409 return(ret);
6410}
6411
6412/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006413 * xmlParseElementChildrenContentDecl:
6414 * @ctxt: an XML parser context
6415 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006416 *
6417 * parse the declaration for a Mixed Element content
6418 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6419 *
6420 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6421 *
6422 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6423 *
6424 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6425 *
6426 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6427 *
6428 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6429 * TODO Parameter-entity replacement text must be properly nested
6430 * with parenthesized groups. That is to say, if either of the
6431 * opening or closing parentheses in a choice, seq, or Mixed
6432 * construct is contained in the replacement text for a parameter
6433 * entity, both must be contained in the same replacement text. For
6434 * interoperability, if a parameter-entity reference appears in a
6435 * choice, seq, or Mixed construct, its replacement text should not
6436 * be empty, and neither the first nor last non-blank character of
6437 * the replacement text should be a connector (| or ,).
6438 *
6439 * Returns the tree of xmlElementContentPtr describing the element
6440 * hierarchy.
6441 */
6442xmlElementContentPtr
6443xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6444 /* stub left for API/ABI compat */
6445 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6446}
6447
6448/**
Owen Taylor3473f882001-02-23 17:55:21 +00006449 * xmlParseElementContentDecl:
6450 * @ctxt: an XML parser context
6451 * @name: the name of the element being defined.
6452 * @result: the Element Content pointer will be stored here if any
6453 *
6454 * parse the declaration for an Element content either Mixed or Children,
6455 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006456 *
Owen Taylor3473f882001-02-23 17:55:21 +00006457 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6458 *
6459 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6460 */
6461
6462int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006463xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006464 xmlElementContentPtr *result) {
6465
6466 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006467 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006468 int res;
6469
6470 *result = NULL;
6471
6472 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006473 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006474 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006475 return(-1);
6476 }
6477 NEXT;
6478 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006479 if (ctxt->instate == XML_PARSER_EOF)
6480 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006481 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006482 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006483 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006484 res = XML_ELEMENT_TYPE_MIXED;
6485 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006486 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006487 res = XML_ELEMENT_TYPE_ELEMENT;
6488 }
Owen Taylor3473f882001-02-23 17:55:21 +00006489 SKIP_BLANKS;
6490 *result = tree;
6491 return(res);
6492}
6493
6494/**
6495 * xmlParseElementDecl:
6496 * @ctxt: an XML parser context
6497 *
6498 * parse an Element declaration.
6499 *
6500 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6501 *
6502 * [ VC: Unique Element Type Declaration ]
6503 * No element type may be declared more than once
6504 *
6505 * Returns the type of the element, or -1 in case of error
6506 */
6507int
6508xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006509 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006510 int ret = -1;
6511 xmlElementContentPtr content = NULL;
6512
Daniel Veillard4c778d82005-01-23 17:37:44 +00006513 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006514 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006515 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006516
6517 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006518 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6520 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006521 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006522 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006523 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006524 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006525 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6526 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006527 return(-1);
6528 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006529 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6531 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006532 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006533 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006534 SKIP(5);
6535 /*
6536 * Element must always be empty.
6537 */
6538 ret = XML_ELEMENT_TYPE_EMPTY;
6539 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6540 (NXT(2) == 'Y')) {
6541 SKIP(3);
6542 /*
6543 * Element is a generic container.
6544 */
6545 ret = XML_ELEMENT_TYPE_ANY;
6546 } else if (RAW == '(') {
6547 ret = xmlParseElementContentDecl(ctxt, name, &content);
6548 } else {
6549 /*
6550 * [ WFC: PEs in Internal Subset ] error handling.
6551 */
6552 if ((RAW == '%') && (ctxt->external == 0) &&
6553 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006554 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006555 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006556 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006557 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006558 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6559 }
Owen Taylor3473f882001-02-23 17:55:21 +00006560 return(-1);
6561 }
6562
6563 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006564
6565 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006566 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006567 if (content != NULL) {
6568 xmlFreeDocElementContent(ctxt->myDoc, content);
6569 }
Owen Taylor3473f882001-02-23 17:55:21 +00006570 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006571 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006572 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006573 "Element declaration doesn't start and stop in"
6574 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006575 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006576
Owen Taylor3473f882001-02-23 17:55:21 +00006577 NEXT;
6578 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006579 (ctxt->sax->elementDecl != NULL)) {
6580 if (content != NULL)
6581 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006582 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6583 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006584 if ((content != NULL) && (content->parent == NULL)) {
6585 /*
6586 * this is a trick: if xmlAddElementDecl is called,
6587 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006588 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006589 * interfaces or change the API/ABI
6590 */
6591 xmlFreeDocElementContent(ctxt->myDoc, content);
6592 }
6593 } else if (content != NULL) {
6594 xmlFreeDocElementContent(ctxt->myDoc, content);
6595 }
Owen Taylor3473f882001-02-23 17:55:21 +00006596 }
Owen Taylor3473f882001-02-23 17:55:21 +00006597 }
6598 return(ret);
6599}
6600
6601/**
Owen Taylor3473f882001-02-23 17:55:21 +00006602 * xmlParseConditionalSections
6603 * @ctxt: an XML parser context
6604 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006605 * [61] conditionalSect ::= includeSect | ignoreSect
6606 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006607 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6608 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6609 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6610 */
6611
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006612static void
Owen Taylor3473f882001-02-23 17:55:21 +00006613xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006614 int id = ctxt->input->id;
6615
Owen Taylor3473f882001-02-23 17:55:21 +00006616 SKIP(3);
6617 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006618 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006619 SKIP(7);
6620 SKIP_BLANKS;
6621 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006622 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006623 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006624 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006625 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006626 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006627 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6628 "All markup of the conditional section is not"
6629 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006630 }
Owen Taylor3473f882001-02-23 17:55:21 +00006631 NEXT;
6632 }
6633 if (xmlParserDebugEntities) {
6634 if ((ctxt->input != NULL) && (ctxt->input->filename))
6635 xmlGenericError(xmlGenericErrorContext,
6636 "%s(%d): ", ctxt->input->filename,
6637 ctxt->input->line);
6638 xmlGenericError(xmlGenericErrorContext,
6639 "Entering INCLUDE Conditional Section\n");
6640 }
6641
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006642 SKIP_BLANKS;
6643 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006644 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6645 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006646 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006647 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006648
6649 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6650 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006651 } else
6652 xmlParseMarkupDecl(ctxt);
6653
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006654 SKIP_BLANKS;
6655 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006656
Daniel Veillardfdc91562002-07-01 21:52:03 +00006657 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006658 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006659 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006660 break;
6661 }
6662 }
6663 if (xmlParserDebugEntities) {
6664 if ((ctxt->input != NULL) && (ctxt->input->filename))
6665 xmlGenericError(xmlGenericErrorContext,
6666 "%s(%d): ", ctxt->input->filename,
6667 ctxt->input->line);
6668 xmlGenericError(xmlGenericErrorContext,
6669 "Leaving INCLUDE Conditional Section\n");
6670 }
6671
Daniel Veillarda07050d2003-10-19 14:46:32 +00006672 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006673 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006674 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006675 int depth = 0;
6676
6677 SKIP(6);
6678 SKIP_BLANKS;
6679 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006680 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006681 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006682 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006683 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006684 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006685 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6686 "All markup of the conditional section is not"
6687 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006688 }
Owen Taylor3473f882001-02-23 17:55:21 +00006689 NEXT;
6690 }
6691 if (xmlParserDebugEntities) {
6692 if ((ctxt->input != NULL) && (ctxt->input->filename))
6693 xmlGenericError(xmlGenericErrorContext,
6694 "%s(%d): ", ctxt->input->filename,
6695 ctxt->input->line);
6696 xmlGenericError(xmlGenericErrorContext,
6697 "Entering IGNORE Conditional Section\n");
6698 }
6699
6700 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006701 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006702 * But disable SAX event generating DTD building in the meantime
6703 */
6704 state = ctxt->disableSAX;
6705 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006706 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006707 ctxt->instate = XML_PARSER_IGNORE;
6708
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006709 while (((depth >= 0) && (RAW != 0)) &&
6710 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006711 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6712 depth++;
6713 SKIP(3);
6714 continue;
6715 }
6716 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6717 if (--depth >= 0) SKIP(3);
6718 continue;
6719 }
6720 NEXT;
6721 continue;
6722 }
6723
6724 ctxt->disableSAX = state;
6725 ctxt->instate = instate;
6726
6727 if (xmlParserDebugEntities) {
6728 if ((ctxt->input != NULL) && (ctxt->input->filename))
6729 xmlGenericError(xmlGenericErrorContext,
6730 "%s(%d): ", ctxt->input->filename,
6731 ctxt->input->line);
6732 xmlGenericError(xmlGenericErrorContext,
6733 "Leaving IGNORE Conditional Section\n");
6734 }
6735
6736 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006737 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006738 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006739 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006740 }
6741
6742 if (RAW == 0)
6743 SHRINK;
6744
6745 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006746 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006747 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006748 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006749 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 "All markup of the conditional section is not in"
6751 " the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006752 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006753 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006754 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006755 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006756 }
6757}
6758
6759/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006760 * xmlParseMarkupDecl:
6761 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006762 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006763 * parse Markup declarations
6764 *
6765 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6766 * NotationDecl | PI | Comment
6767 *
6768 * [ VC: Proper Declaration/PE Nesting ]
6769 * Parameter-entity replacement text must be properly nested with
6770 * markup declarations. That is to say, if either the first character
6771 * or the last character of a markup declaration (markupdecl above) is
6772 * contained in the replacement text for a parameter-entity reference,
6773 * both must be contained in the same replacement text.
6774 *
6775 * [ WFC: PEs in Internal Subset ]
6776 * In the internal DTD subset, parameter-entity references can occur
6777 * only where markup declarations can occur, not within markup declarations.
6778 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006779 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006780 */
6781void
6782xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6783 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006784 if (CUR == '<') {
6785 if (NXT(1) == '!') {
6786 switch (NXT(2)) {
6787 case 'E':
6788 if (NXT(3) == 'L')
6789 xmlParseElementDecl(ctxt);
6790 else if (NXT(3) == 'N')
6791 xmlParseEntityDecl(ctxt);
6792 break;
6793 case 'A':
6794 xmlParseAttributeListDecl(ctxt);
6795 break;
6796 case 'N':
6797 xmlParseNotationDecl(ctxt);
6798 break;
6799 case '-':
6800 xmlParseComment(ctxt);
6801 break;
6802 default:
6803 /* there is an error but it will be detected later */
6804 break;
6805 }
6806 } else if (NXT(1) == '?') {
6807 xmlParsePI(ctxt);
6808 }
6809 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006810
6811 /*
6812 * detect requirement to exit there and act accordingly
6813 * and avoid having instate overriden later on
6814 */
6815 if (ctxt->instate == XML_PARSER_EOF)
6816 return;
6817
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006818 /*
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006819 * Conditional sections are allowed from entities included
6820 * by PE References in the internal subset.
6821 */
6822 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 xmlParseConditionalSections(ctxt);
6825 }
6826 }
6827
6828 ctxt->instate = XML_PARSER_DTD;
6829}
6830
6831/**
6832 * xmlParseTextDecl:
6833 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006834 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006835 * parse an XML declaration header for external entities
6836 *
6837 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006838 */
6839
6840void
6841xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6842 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006843 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006844
6845 /*
6846 * We know that '<?xml' is here.
6847 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006848 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006849 SKIP(5);
6850 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006851 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006852 return;
6853 }
6854
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006855 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006856 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6857 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006858 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006859
6860 /*
6861 * We may have the VersionInfo here.
6862 */
6863 version = xmlParseVersionInfo(ctxt);
6864 if (version == NULL)
6865 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006866 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006867 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006868 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6869 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006870 }
6871 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006872 ctxt->input->version = version;
6873
6874 /*
6875 * We must have the encoding declaration
6876 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006877 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006878 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6879 /*
6880 * The XML REC instructs us to stop parsing right here
6881 */
6882 return;
6883 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006884 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6885 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6886 "Missing encoding in text declaration\n");
6887 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006888
6889 SKIP_BLANKS;
6890 if ((RAW == '?') && (NXT(1) == '>')) {
6891 SKIP(2);
6892 } else if (RAW == '>') {
6893 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006894 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006895 NEXT;
6896 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006897 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006898 MOVETO_ENDTAG(CUR_PTR);
6899 NEXT;
6900 }
6901}
6902
6903/**
Owen Taylor3473f882001-02-23 17:55:21 +00006904 * xmlParseExternalSubset:
6905 * @ctxt: an XML parser context
6906 * @ExternalID: the external identifier
6907 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006908 *
Owen Taylor3473f882001-02-23 17:55:21 +00006909 * parse Markup declarations from an external subset
6910 *
6911 * [30] extSubset ::= textDecl? extSubsetDecl
6912 *
6913 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6914 */
6915void
6916xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6917 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006918 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006919 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006920
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006921 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006922 (ctxt->input->end - ctxt->input->cur >= 4)) {
6923 xmlChar start[4];
6924 xmlCharEncoding enc;
6925
6926 start[0] = RAW;
6927 start[1] = NXT(1);
6928 start[2] = NXT(2);
6929 start[3] = NXT(3);
6930 enc = xmlDetectCharEncoding(start, 4);
6931 if (enc != XML_CHAR_ENCODING_NONE)
6932 xmlSwitchEncoding(ctxt, enc);
6933 }
6934
Daniel Veillarda07050d2003-10-19 14:46:32 +00006935 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006936 xmlParseTextDecl(ctxt);
6937 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6938 /*
6939 * The XML REC instructs us to stop parsing right here
6940 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08006941 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006942 return;
6943 }
6944 }
6945 if (ctxt->myDoc == NULL) {
6946 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006947 if (ctxt->myDoc == NULL) {
6948 xmlErrMemory(ctxt, "New Doc failed");
6949 return;
6950 }
6951 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006952 }
6953 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6954 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6955
6956 ctxt->instate = XML_PARSER_DTD;
6957 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006958 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006959 while (((RAW == '<') && (NXT(1) == '?')) ||
6960 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006961 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006962 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006963 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006964
6965 GROW;
6966 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6967 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006968 } else
6969 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006970 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006971
Daniel Veillardfdc91562002-07-01 21:52:03 +00006972 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006973 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006974 break;
6975 }
6976 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006977
Owen Taylor3473f882001-02-23 17:55:21 +00006978 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006979 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006980 }
6981
6982}
6983
6984/**
6985 * xmlParseReference:
6986 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006987 *
Owen Taylor3473f882001-02-23 17:55:21 +00006988 * parse and handle entity references in content, depending on the SAX
6989 * interface, this may end-up in a call to character() if this is a
6990 * CharRef, a predefined entity, if there is no reference() callback.
6991 * or if the parser was asked to switch to that mode.
6992 *
6993 * [67] Reference ::= EntityRef | CharRef
6994 */
6995void
6996xmlParseReference(xmlParserCtxtPtr ctxt) {
6997 xmlEntityPtr ent;
6998 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006999 int was_checked;
7000 xmlNodePtr list = NULL;
7001 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007002
Daniel Veillard0161e632008-08-28 15:36:32 +00007003
7004 if (RAW != '&')
7005 return;
7006
7007 /*
7008 * Simple case of a CharRef
7009 */
Owen Taylor3473f882001-02-23 17:55:21 +00007010 if (NXT(1) == '#') {
7011 int i = 0;
7012 xmlChar out[10];
7013 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007014 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007015
Daniel Veillarddc171602008-03-26 17:41:38 +00007016 if (value == 0)
7017 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007018 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7019 /*
7020 * So we are using non-UTF-8 buffers
7021 * Check that the char fit on 8bits, if not
7022 * generate a CharRef.
7023 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007024 if (value <= 0xFF) {
7025 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007026 out[1] = 0;
7027 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7028 (!ctxt->disableSAX))
7029 ctxt->sax->characters(ctxt->userData, out, 1);
7030 } else {
7031 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007032 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007033 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007034 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007035 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7036 (!ctxt->disableSAX))
7037 ctxt->sax->reference(ctxt->userData, out);
7038 }
7039 } else {
7040 /*
7041 * Just encode the value in UTF-8
7042 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007043 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007044 out[i] = 0;
7045 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7046 (!ctxt->disableSAX))
7047 ctxt->sax->characters(ctxt->userData, out, i);
7048 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007049 return;
7050 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007051
Daniel Veillard0161e632008-08-28 15:36:32 +00007052 /*
7053 * We are seeing an entity reference
7054 */
7055 ent = xmlParseEntityRef(ctxt);
7056 if (ent == NULL) return;
7057 if (!ctxt->wellFormed)
7058 return;
7059 was_checked = ent->checked;
7060
7061 /* special case of predefined entities */
7062 if ((ent->name == NULL) ||
7063 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7064 val = ent->content;
7065 if (val == NULL) return;
7066 /*
7067 * inline the entity.
7068 */
7069 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7070 (!ctxt->disableSAX))
7071 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7072 return;
7073 }
7074
7075 /*
7076 * The first reference to the entity trigger a parsing phase
7077 * where the ent->children is filled with the result from
7078 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007079 * Note: external parsed entities will not be loaded, it is not
7080 * required for a non-validating parser, unless the parsing option
7081 * of validating, or substituting entities were given. Doing so is
7082 * far more secure as the parser will only process data coming from
7083 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007084 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007085 if (((ent->checked == 0) ||
7086 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007087 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7088 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007089 unsigned long oldnbent = ctxt->nbentities;
7090
7091 /*
7092 * This is a bit hackish but this seems the best
7093 * way to make sure both SAX and DOM entity support
7094 * behaves okay.
7095 */
7096 void *user_data;
7097 if (ctxt->userData == ctxt)
7098 user_data = NULL;
7099 else
7100 user_data = ctxt->userData;
7101
7102 /*
7103 * Check that this entity is well formed
7104 * 4.3.2: An internal general parsed entity is well-formed
7105 * if its replacement text matches the production labeled
7106 * content.
7107 */
7108 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7109 ctxt->depth++;
7110 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7111 user_data, &list);
7112 ctxt->depth--;
7113
7114 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7115 ctxt->depth++;
7116 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7117 user_data, ctxt->depth, ent->URI,
7118 ent->ExternalID, &list);
7119 ctxt->depth--;
7120 } else {
7121 ret = XML_ERR_ENTITY_PE_INTERNAL;
7122 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7123 "invalid entity type found\n", NULL);
7124 }
7125
7126 /*
7127 * Store the number of entities needing parsing for this entity
7128 * content and do checkings
7129 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007130 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7131 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7132 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007133 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007134 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007135 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007136 return;
7137 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007138 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007139 xmlFreeNodeList(list);
7140 return;
7141 }
Owen Taylor3473f882001-02-23 17:55:21 +00007142
Daniel Veillard0161e632008-08-28 15:36:32 +00007143 if ((ret == XML_ERR_OK) && (list != NULL)) {
7144 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7145 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7146 (ent->children == NULL)) {
7147 ent->children = list;
7148 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007149 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007150 * Prune it directly in the generated document
7151 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007152 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007153 if (((list->type == XML_TEXT_NODE) &&
7154 (list->next == NULL)) ||
7155 (ctxt->parseMode == XML_PARSE_READER)) {
7156 list->parent = (xmlNodePtr) ent;
7157 list = NULL;
7158 ent->owner = 1;
7159 } else {
7160 ent->owner = 0;
7161 while (list != NULL) {
7162 list->parent = (xmlNodePtr) ctxt->node;
7163 list->doc = ctxt->myDoc;
7164 if (list->next == NULL)
7165 ent->last = list;
7166 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007167 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007168 list = ent->children;
7169#ifdef LIBXML_LEGACY_ENABLED
7170 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7171 xmlAddEntityReference(ent, list, NULL);
7172#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007173 }
7174 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007175 ent->owner = 1;
7176 while (list != NULL) {
7177 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007178 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007179 if (list->next == NULL)
7180 ent->last = list;
7181 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007182 }
7183 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007184 } else {
7185 xmlFreeNodeList(list);
7186 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007187 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007188 } else if ((ret != XML_ERR_OK) &&
7189 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7190 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7191 "Entity '%s' failed to parse\n", ent->name);
Nick Wellnhofer60dded12018-01-22 15:04:58 +01007192 if (ent->content != NULL)
7193 ent->content[0] = 0;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007194 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007195 } else if (list != NULL) {
7196 xmlFreeNodeList(list);
7197 list = NULL;
7198 }
7199 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007200 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007201
7202 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7203 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007204 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007205 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007206 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007207
Daniel Veillard0161e632008-08-28 15:36:32 +00007208 /*
7209 * Now that the entity content has been gathered
7210 * provide it to the application, this can take different forms based
7211 * on the parsing modes.
7212 */
7213 if (ent->children == NULL) {
7214 /*
7215 * Probably running in SAX mode and the callbacks don't
7216 * build the entity content. So unless we already went
7217 * though parsing for first checking go though the entity
7218 * content to generate callbacks associated to the entity
7219 */
7220 if (was_checked != 0) {
7221 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007222 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007223 * This is a bit hackish but this seems the best
7224 * way to make sure both SAX and DOM entity support
7225 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007226 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007227 if (ctxt->userData == ctxt)
7228 user_data = NULL;
7229 else
7230 user_data = ctxt->userData;
7231
7232 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7233 ctxt->depth++;
7234 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7235 ent->content, user_data, NULL);
7236 ctxt->depth--;
7237 } else if (ent->etype ==
7238 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7239 ctxt->depth++;
7240 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7241 ctxt->sax, user_data, ctxt->depth,
7242 ent->URI, ent->ExternalID, NULL);
7243 ctxt->depth--;
7244 } else {
7245 ret = XML_ERR_ENTITY_PE_INTERNAL;
7246 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7247 "invalid entity type found\n", NULL);
7248 }
7249 if (ret == XML_ERR_ENTITY_LOOP) {
7250 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7251 return;
7252 }
7253 }
7254 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7255 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7256 /*
7257 * Entity reference callback comes second, it's somewhat
7258 * superfluous but a compatibility to historical behaviour
7259 */
7260 ctxt->sax->reference(ctxt->userData, ent->name);
7261 }
7262 return;
7263 }
7264
7265 /*
7266 * If we didn't get any children for the entity being built
7267 */
7268 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7269 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7270 /*
7271 * Create a node.
7272 */
7273 ctxt->sax->reference(ctxt->userData, ent->name);
7274 return;
7275 }
7276
7277 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7278 /*
7279 * There is a problem on the handling of _private for entities
7280 * (bug 155816): Should we copy the content of the field from
7281 * the entity (possibly overwriting some value set by the user
7282 * when a copy is created), should we leave it alone, or should
7283 * we try to take care of different situations? The problem
7284 * is exacerbated by the usage of this field by the xmlReader.
7285 * To fix this bug, we look at _private on the created node
7286 * and, if it's NULL, we copy in whatever was in the entity.
7287 * If it's not NULL we leave it alone. This is somewhat of a
7288 * hack - maybe we should have further tests to determine
7289 * what to do.
7290 */
7291 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7292 /*
7293 * Seems we are generating the DOM content, do
7294 * a simple tree copy for all references except the first
7295 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007296 */
7297 if (((list == NULL) && (ent->owner == 0)) ||
7298 (ctxt->parseMode == XML_PARSE_READER)) {
7299 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7300
7301 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007302 * We are copying here, make sure there is no abuse
7303 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007304 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007305 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7306 return;
7307
7308 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007309 * when operating on a reader, the entities definitions
7310 * are always owning the entities subtree.
7311 if (ctxt->parseMode == XML_PARSE_READER)
7312 ent->owner = 1;
7313 */
7314
7315 cur = ent->children;
7316 while (cur != NULL) {
7317 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7318 if (nw != NULL) {
7319 if (nw->_private == NULL)
7320 nw->_private = cur->_private;
7321 if (firstChild == NULL){
7322 firstChild = nw;
7323 }
7324 nw = xmlAddChild(ctxt->node, nw);
7325 }
7326 if (cur == ent->last) {
7327 /*
7328 * needed to detect some strange empty
7329 * node cases in the reader tests
7330 */
7331 if ((ctxt->parseMode == XML_PARSE_READER) &&
7332 (nw != NULL) &&
7333 (nw->type == XML_ELEMENT_NODE) &&
7334 (nw->children == NULL))
7335 nw->extra = 1;
7336
7337 break;
7338 }
7339 cur = cur->next;
7340 }
7341#ifdef LIBXML_LEGACY_ENABLED
7342 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7343 xmlAddEntityReference(ent, firstChild, nw);
7344#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007345 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007346 xmlNodePtr nw = NULL, cur, next, last,
7347 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007348
7349 /*
7350 * We are copying here, make sure there is no abuse
7351 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007352 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007353 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7354 return;
7355
Daniel Veillard0161e632008-08-28 15:36:32 +00007356 /*
7357 * Copy the entity child list and make it the new
7358 * entity child list. The goal is to make sure any
7359 * ID or REF referenced will be the one from the
7360 * document content and not the entity copy.
7361 */
7362 cur = ent->children;
7363 ent->children = NULL;
7364 last = ent->last;
7365 ent->last = NULL;
7366 while (cur != NULL) {
7367 next = cur->next;
7368 cur->next = NULL;
7369 cur->parent = NULL;
7370 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7371 if (nw != NULL) {
7372 if (nw->_private == NULL)
7373 nw->_private = cur->_private;
7374 if (firstChild == NULL){
7375 firstChild = cur;
7376 }
7377 xmlAddChild((xmlNodePtr) ent, nw);
7378 xmlAddChild(ctxt->node, cur);
7379 }
7380 if (cur == last)
7381 break;
7382 cur = next;
7383 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007384 if (ent->owner == 0)
7385 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007386#ifdef LIBXML_LEGACY_ENABLED
7387 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7388 xmlAddEntityReference(ent, firstChild, nw);
7389#endif /* LIBXML_LEGACY_ENABLED */
7390 } else {
7391 const xmlChar *nbktext;
7392
7393 /*
7394 * the name change is to avoid coalescing of the
7395 * node with a possible previous text one which
7396 * would make ent->children a dangling pointer
7397 */
7398 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7399 -1);
7400 if (ent->children->type == XML_TEXT_NODE)
7401 ent->children->name = nbktext;
7402 if ((ent->last != ent->children) &&
7403 (ent->last->type == XML_TEXT_NODE))
7404 ent->last->name = nbktext;
7405 xmlAddChildList(ctxt->node, ent->children);
7406 }
7407
7408 /*
7409 * This is to avoid a nasty side effect, see
7410 * characters() in SAX.c
7411 */
7412 ctxt->nodemem = 0;
7413 ctxt->nodelen = 0;
7414 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007415 }
7416 }
7417}
7418
7419/**
7420 * xmlParseEntityRef:
7421 * @ctxt: an XML parser context
7422 *
7423 * parse ENTITY references declarations
7424 *
7425 * [68] EntityRef ::= '&' Name ';'
7426 *
7427 * [ WFC: Entity Declared ]
7428 * In a document without any DTD, a document with only an internal DTD
7429 * subset which contains no parameter entity references, or a document
7430 * with "standalone='yes'", the Name given in the entity reference
7431 * must match that in an entity declaration, except that well-formed
7432 * documents need not declare any of the following entities: amp, lt,
7433 * gt, apos, quot. The declaration of a parameter entity must precede
7434 * any reference to it. Similarly, the declaration of a general entity
7435 * must precede any reference to it which appears in a default value in an
7436 * attribute-list declaration. Note that if entities are declared in the
7437 * external subset or in external parameter entities, a non-validating
7438 * processor is not obligated to read and process their declarations;
7439 * for such documents, the rule that an entity must be declared is a
7440 * well-formedness constraint only if standalone='yes'.
7441 *
7442 * [ WFC: Parsed Entity ]
7443 * An entity reference must not contain the name of an unparsed entity
7444 *
7445 * Returns the xmlEntityPtr if found, or NULL otherwise.
7446 */
7447xmlEntityPtr
7448xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007449 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007450 xmlEntityPtr ent = NULL;
7451
7452 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007453 if (ctxt->instate == XML_PARSER_EOF)
7454 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007455
Daniel Veillard0161e632008-08-28 15:36:32 +00007456 if (RAW != '&')
7457 return(NULL);
7458 NEXT;
7459 name = xmlParseName(ctxt);
7460 if (name == NULL) {
7461 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7462 "xmlParseEntityRef: no name\n");
7463 return(NULL);
7464 }
7465 if (RAW != ';') {
7466 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7467 return(NULL);
7468 }
7469 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007470
Daniel Veillard0161e632008-08-28 15:36:32 +00007471 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007472 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007473 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007474 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7475 ent = xmlGetPredefinedEntity(name);
7476 if (ent != NULL)
7477 return(ent);
7478 }
Owen Taylor3473f882001-02-23 17:55:21 +00007479
Daniel Veillard0161e632008-08-28 15:36:32 +00007480 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007481 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007482 */
7483 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007484
Daniel Veillard0161e632008-08-28 15:36:32 +00007485 /*
7486 * Ask first SAX for entity resolution, otherwise try the
7487 * entities which may have stored in the parser context.
7488 */
7489 if (ctxt->sax != NULL) {
7490 if (ctxt->sax->getEntity != NULL)
7491 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007492 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007493 (ctxt->options & XML_PARSE_OLDSAX))
7494 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7496 (ctxt->userData==ctxt)) {
7497 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007498 }
7499 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007500 if (ctxt->instate == XML_PARSER_EOF)
7501 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007502 /*
7503 * [ WFC: Entity Declared ]
7504 * In a document without any DTD, a document with only an
7505 * internal DTD subset which contains no parameter entity
7506 * references, or a document with "standalone='yes'", the
7507 * Name given in the entity reference must match that in an
7508 * entity declaration, except that well-formed documents
7509 * need not declare any of the following entities: amp, lt,
7510 * gt, apos, quot.
7511 * The declaration of a parameter entity must precede any
7512 * reference to it.
7513 * Similarly, the declaration of a general entity must
7514 * precede any reference to it which appears in a default
7515 * value in an attribute-list declaration. Note that if
7516 * entities are declared in the external subset or in
7517 * external parameter entities, a non-validating processor
7518 * is not obligated to read and process their declarations;
7519 * for such documents, the rule that an entity must be
7520 * declared is a well-formedness constraint only if
7521 * standalone='yes'.
7522 */
7523 if (ent == NULL) {
7524 if ((ctxt->standalone == 1) ||
7525 ((ctxt->hasExternalSubset == 0) &&
7526 (ctxt->hasPErefs == 0))) {
7527 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7528 "Entity '%s' not defined\n", name);
7529 } else {
7530 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7531 "Entity '%s' not defined\n", name);
7532 if ((ctxt->inSubset == 0) &&
7533 (ctxt->sax != NULL) &&
7534 (ctxt->sax->reference != NULL)) {
7535 ctxt->sax->reference(ctxt->userData, name);
7536 }
7537 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007538 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007539 ctxt->valid = 0;
7540 }
7541
7542 /*
7543 * [ WFC: Parsed Entity ]
7544 * An entity reference must not contain the name of an
7545 * unparsed entity
7546 */
7547 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7548 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7549 "Entity reference to unparsed entity %s\n", name);
7550 }
7551
7552 /*
7553 * [ WFC: No External Entity References ]
7554 * Attribute values cannot contain direct or indirect
7555 * entity references to external entities.
7556 */
7557 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7558 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7559 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7560 "Attribute references external entity '%s'\n", name);
7561 }
7562 /*
7563 * [ WFC: No < in Attribute Values ]
7564 * The replacement text of any entity referred to directly or
7565 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007566 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007567 */
7568 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007569 (ent != NULL) &&
7570 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007571 if (((ent->checked & 1) || (ent->checked == 0)) &&
7572 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007573 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7574 "'<' in entity '%s' is not allowed in attributes values\n", name);
7575 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007576 }
7577
7578 /*
7579 * Internal check, no parameter entities here ...
7580 */
7581 else {
7582 switch (ent->etype) {
7583 case XML_INTERNAL_PARAMETER_ENTITY:
7584 case XML_EXTERNAL_PARAMETER_ENTITY:
7585 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7586 "Attempt to reference the parameter entity '%s'\n",
7587 name);
7588 break;
7589 default:
7590 break;
7591 }
7592 }
7593
7594 /*
7595 * [ WFC: No Recursion ]
7596 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007597 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007598 * Done somewhere else
7599 */
Owen Taylor3473f882001-02-23 17:55:21 +00007600 return(ent);
7601}
7602
7603/**
7604 * xmlParseStringEntityRef:
7605 * @ctxt: an XML parser context
7606 * @str: a pointer to an index in the string
7607 *
7608 * parse ENTITY references declarations, but this version parses it from
7609 * a string value.
7610 *
7611 * [68] EntityRef ::= '&' Name ';'
7612 *
7613 * [ WFC: Entity Declared ]
7614 * In a document without any DTD, a document with only an internal DTD
7615 * subset which contains no parameter entity references, or a document
7616 * with "standalone='yes'", the Name given in the entity reference
7617 * must match that in an entity declaration, except that well-formed
7618 * documents need not declare any of the following entities: amp, lt,
7619 * gt, apos, quot. The declaration of a parameter entity must precede
7620 * any reference to it. Similarly, the declaration of a general entity
7621 * must precede any reference to it which appears in a default value in an
7622 * attribute-list declaration. Note that if entities are declared in the
7623 * external subset or in external parameter entities, a non-validating
7624 * processor is not obligated to read and process their declarations;
7625 * for such documents, the rule that an entity must be declared is a
7626 * well-formedness constraint only if standalone='yes'.
7627 *
7628 * [ WFC: Parsed Entity ]
7629 * An entity reference must not contain the name of an unparsed entity
7630 *
7631 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7632 * is updated to the current location in the string.
7633 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007634static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007635xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7636 xmlChar *name;
7637 const xmlChar *ptr;
7638 xmlChar cur;
7639 xmlEntityPtr ent = NULL;
7640
7641 if ((str == NULL) || (*str == NULL))
7642 return(NULL);
7643 ptr = *str;
7644 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007645 if (cur != '&')
7646 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007647
Daniel Veillard0161e632008-08-28 15:36:32 +00007648 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007649 name = xmlParseStringName(ctxt, &ptr);
7650 if (name == NULL) {
7651 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7652 "xmlParseStringEntityRef: no name\n");
7653 *str = ptr;
7654 return(NULL);
7655 }
7656 if (*ptr != ';') {
7657 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007658 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007659 *str = ptr;
7660 return(NULL);
7661 }
7662 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007663
Owen Taylor3473f882001-02-23 17:55:21 +00007664
Daniel Veillard0161e632008-08-28 15:36:32 +00007665 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007666 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007667 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007668 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7669 ent = xmlGetPredefinedEntity(name);
7670 if (ent != NULL) {
7671 xmlFree(name);
7672 *str = ptr;
7673 return(ent);
7674 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007675 }
Owen Taylor3473f882001-02-23 17:55:21 +00007676
Daniel Veillard0161e632008-08-28 15:36:32 +00007677 /*
7678 * Increate the number of entity references parsed
7679 */
7680 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007681
Daniel Veillard0161e632008-08-28 15:36:32 +00007682 /*
7683 * Ask first SAX for entity resolution, otherwise try the
7684 * entities which may have stored in the parser context.
7685 */
7686 if (ctxt->sax != NULL) {
7687 if (ctxt->sax->getEntity != NULL)
7688 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007689 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7690 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007691 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7692 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007693 }
7694 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007695 if (ctxt->instate == XML_PARSER_EOF) {
7696 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007697 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007698 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007699
7700 /*
7701 * [ WFC: Entity Declared ]
7702 * In a document without any DTD, a document with only an
7703 * internal DTD subset which contains no parameter entity
7704 * references, or a document with "standalone='yes'", the
7705 * Name given in the entity reference must match that in an
7706 * entity declaration, except that well-formed documents
7707 * need not declare any of the following entities: amp, lt,
7708 * gt, apos, quot.
7709 * The declaration of a parameter entity must precede any
7710 * reference to it.
7711 * Similarly, the declaration of a general entity must
7712 * precede any reference to it which appears in a default
7713 * value in an attribute-list declaration. Note that if
7714 * entities are declared in the external subset or in
7715 * external parameter entities, a non-validating processor
7716 * is not obligated to read and process their declarations;
7717 * for such documents, the rule that an entity must be
7718 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007719 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007720 */
7721 if (ent == NULL) {
7722 if ((ctxt->standalone == 1) ||
7723 ((ctxt->hasExternalSubset == 0) &&
7724 (ctxt->hasPErefs == 0))) {
7725 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7726 "Entity '%s' not defined\n", name);
7727 } else {
7728 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7729 "Entity '%s' not defined\n",
7730 name);
7731 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007732 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007733 /* TODO ? check regressions ctxt->valid = 0; */
7734 }
7735
7736 /*
7737 * [ WFC: Parsed Entity ]
7738 * An entity reference must not contain the name of an
7739 * unparsed entity
7740 */
7741 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7742 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7743 "Entity reference to unparsed entity %s\n", name);
7744 }
7745
7746 /*
7747 * [ WFC: No External Entity References ]
7748 * Attribute values cannot contain direct or indirect
7749 * entity references to external entities.
7750 */
7751 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7752 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7753 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7754 "Attribute references external entity '%s'\n", name);
7755 }
7756 /*
7757 * [ WFC: No < in Attribute Values ]
7758 * The replacement text of any entity referred to directly or
7759 * indirectly in an attribute value (other than "&lt;") must
7760 * not contain a <.
7761 */
7762 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7763 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007764 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007765 (xmlStrchr(ent->content, '<'))) {
7766 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7767 "'<' in entity '%s' is not allowed in attributes values\n",
7768 name);
7769 }
7770
7771 /*
7772 * Internal check, no parameter entities here ...
7773 */
7774 else {
7775 switch (ent->etype) {
7776 case XML_INTERNAL_PARAMETER_ENTITY:
7777 case XML_EXTERNAL_PARAMETER_ENTITY:
7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7779 "Attempt to reference the parameter entity '%s'\n",
7780 name);
7781 break;
7782 default:
7783 break;
7784 }
7785 }
7786
7787 /*
7788 * [ WFC: No Recursion ]
7789 * A parsed entity must not contain a recursive reference
7790 * to itself, either directly or indirectly.
7791 * Done somewhere else
7792 */
7793
7794 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007795 *str = ptr;
7796 return(ent);
7797}
7798
7799/**
7800 * xmlParsePEReference:
7801 * @ctxt: an XML parser context
7802 *
7803 * parse PEReference declarations
7804 * The entity content is handled directly by pushing it's content as
7805 * a new input stream.
7806 *
7807 * [69] PEReference ::= '%' Name ';'
7808 *
7809 * [ WFC: No Recursion ]
7810 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007811 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007812 *
7813 * [ WFC: Entity Declared ]
7814 * In a document without any DTD, a document with only an internal DTD
7815 * subset which contains no parameter entity references, or a document
7816 * with "standalone='yes'", ... ... The declaration of a parameter
7817 * entity must precede any reference to it...
7818 *
7819 * [ VC: Entity Declared ]
7820 * In a document with an external subset or external parameter entities
7821 * with "standalone='no'", ... ... The declaration of a parameter entity
7822 * must precede any reference to it...
7823 *
7824 * [ WFC: In DTD ]
7825 * Parameter-entity references may only appear in the DTD.
7826 * NOTE: misleading but this is handled.
7827 */
7828void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007829xmlParsePEReference(xmlParserCtxtPtr ctxt)
7830{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007831 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007832 xmlEntityPtr entity = NULL;
7833 xmlParserInputPtr input;
7834
Daniel Veillard0161e632008-08-28 15:36:32 +00007835 if (RAW != '%')
7836 return;
7837 NEXT;
7838 name = xmlParseName(ctxt);
7839 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007840 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007841 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007842 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007843 if (xmlParserDebugEntities)
7844 xmlGenericError(xmlGenericErrorContext,
7845 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007846 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007847 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007848 return;
7849 }
7850
7851 NEXT;
7852
7853 /*
7854 * Increate the number of entity references parsed
7855 */
7856 ctxt->nbentities++;
7857
7858 /*
7859 * Request the entity from SAX
7860 */
7861 if ((ctxt->sax != NULL) &&
7862 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007863 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7864 if (ctxt->instate == XML_PARSER_EOF)
7865 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007866 if (entity == NULL) {
7867 /*
7868 * [ WFC: Entity Declared ]
7869 * In a document without any DTD, a document with only an
7870 * internal DTD subset which contains no parameter entity
7871 * references, or a document with "standalone='yes'", ...
7872 * ... The declaration of a parameter entity must precede
7873 * any reference to it...
7874 */
7875 if ((ctxt->standalone == 1) ||
7876 ((ctxt->hasExternalSubset == 0) &&
7877 (ctxt->hasPErefs == 0))) {
7878 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7879 "PEReference: %%%s; not found\n",
7880 name);
7881 } else {
7882 /*
7883 * [ VC: Entity Declared ]
7884 * In a document with an external subset or external
7885 * parameter entities with "standalone='no'", ...
7886 * ... The declaration of a parameter entity must
7887 * precede any reference to it...
7888 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007889 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7890 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7891 "PEReference: %%%s; not found\n",
7892 name, NULL);
7893 } else
7894 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7895 "PEReference: %%%s; not found\n",
7896 name, NULL);
7897 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007898 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007899 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007900 } else {
7901 /*
7902 * Internal checking in case the entity quest barfed
7903 */
7904 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7905 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7906 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7907 "Internal: %%%s; is not a parameter entity\n",
7908 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007909 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007910 xmlChar start[4];
7911 xmlCharEncoding enc;
7912
Neel Mehta90ccb582017-04-07 17:43:02 +02007913 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7914 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7915 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7916 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7917 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7918 (ctxt->replaceEntities == 0) &&
7919 (ctxt->validate == 0))
7920 return;
7921
Daniel Veillard0161e632008-08-28 15:36:32 +00007922 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007923 if (xmlPushInput(ctxt, input) < 0) {
7924 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00007925 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007926 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02007927
7928 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7929 /*
7930 * Get the 4 first bytes and decode the charset
7931 * if enc != XML_CHAR_ENCODING_NONE
7932 * plug some encoding conversion routines.
7933 * Note that, since we may have some non-UTF8
7934 * encoding (like UTF16, bug 135229), the 'length'
7935 * is not known, but we can calculate based upon
7936 * the amount of data in the buffer.
7937 */
7938 GROW
7939 if (ctxt->instate == XML_PARSER_EOF)
7940 return;
7941 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7942 start[0] = RAW;
7943 start[1] = NXT(1);
7944 start[2] = NXT(2);
7945 start[3] = NXT(3);
7946 enc = xmlDetectCharEncoding(start, 4);
7947 if (enc != XML_CHAR_ENCODING_NONE) {
7948 xmlSwitchEncoding(ctxt, enc);
7949 }
7950 }
7951
7952 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7953 (IS_BLANK_CH(NXT(5)))) {
7954 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02007955 }
7956 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007957 }
7958 }
7959 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007960}
7961
7962/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007963 * xmlLoadEntityContent:
7964 * @ctxt: an XML parser context
7965 * @entity: an unloaded system entity
7966 *
7967 * Load the original content of the given system entity from the
7968 * ExternalID/SystemID given. This is to be used for Included in Literal
7969 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7970 *
7971 * Returns 0 in case of success and -1 in case of failure
7972 */
7973static int
7974xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7975 xmlParserInputPtr input;
7976 xmlBufferPtr buf;
7977 int l, c;
7978 int count = 0;
7979
7980 if ((ctxt == NULL) || (entity == NULL) ||
7981 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7982 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7983 (entity->content != NULL)) {
7984 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7985 "xmlLoadEntityContent parameter error");
7986 return(-1);
7987 }
7988
7989 if (xmlParserDebugEntities)
7990 xmlGenericError(xmlGenericErrorContext,
7991 "Reading %s entity content input\n", entity->name);
7992
7993 buf = xmlBufferCreate();
7994 if (buf == NULL) {
7995 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7996 "xmlLoadEntityContent parameter error");
7997 return(-1);
7998 }
7999
8000 input = xmlNewEntityInputStream(ctxt, entity);
8001 if (input == NULL) {
8002 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8003 "xmlLoadEntityContent input error");
8004 xmlBufferFree(buf);
8005 return(-1);
8006 }
8007
8008 /*
8009 * Push the entity as the current input, read char by char
8010 * saving to the buffer until the end of the entity or an error
8011 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008012 if (xmlPushInput(ctxt, input) < 0) {
8013 xmlBufferFree(buf);
8014 return(-1);
8015 }
8016
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008017 GROW;
8018 c = CUR_CHAR(l);
8019 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8020 (IS_CHAR(c))) {
8021 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008022 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008023 count = 0;
8024 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008025 if (ctxt->instate == XML_PARSER_EOF) {
8026 xmlBufferFree(buf);
8027 return(-1);
8028 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008029 }
8030 NEXTL(l);
8031 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008032 if (c == 0) {
8033 count = 0;
8034 GROW;
8035 if (ctxt->instate == XML_PARSER_EOF) {
8036 xmlBufferFree(buf);
8037 return(-1);
8038 }
8039 c = CUR_CHAR(l);
8040 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008041 }
8042
8043 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8044 xmlPopInput(ctxt);
8045 } else if (!IS_CHAR(c)) {
8046 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8047 "xmlLoadEntityContent: invalid char value %d\n",
8048 c);
8049 xmlBufferFree(buf);
8050 return(-1);
8051 }
8052 entity->content = buf->content;
8053 buf->content = NULL;
8054 xmlBufferFree(buf);
8055
8056 return(0);
8057}
8058
8059/**
Owen Taylor3473f882001-02-23 17:55:21 +00008060 * xmlParseStringPEReference:
8061 * @ctxt: an XML parser context
8062 * @str: a pointer to an index in the string
8063 *
8064 * parse PEReference declarations
8065 *
8066 * [69] PEReference ::= '%' Name ';'
8067 *
8068 * [ WFC: No Recursion ]
8069 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008070 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008071 *
8072 * [ WFC: Entity Declared ]
8073 * In a document without any DTD, a document with only an internal DTD
8074 * subset which contains no parameter entity references, or a document
8075 * with "standalone='yes'", ... ... The declaration of a parameter
8076 * entity must precede any reference to it...
8077 *
8078 * [ VC: Entity Declared ]
8079 * In a document with an external subset or external parameter entities
8080 * with "standalone='no'", ... ... The declaration of a parameter entity
8081 * must precede any reference to it...
8082 *
8083 * [ WFC: In DTD ]
8084 * Parameter-entity references may only appear in the DTD.
8085 * NOTE: misleading but this is handled.
8086 *
8087 * Returns the string of the entity content.
8088 * str is updated to the current value of the index
8089 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008090static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008091xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8092 const xmlChar *ptr;
8093 xmlChar cur;
8094 xmlChar *name;
8095 xmlEntityPtr entity = NULL;
8096
8097 if ((str == NULL) || (*str == NULL)) return(NULL);
8098 ptr = *str;
8099 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008100 if (cur != '%')
8101 return(NULL);
8102 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008103 name = xmlParseStringName(ctxt, &ptr);
8104 if (name == NULL) {
8105 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8106 "xmlParseStringPEReference: no name\n");
8107 *str = ptr;
8108 return(NULL);
8109 }
8110 cur = *ptr;
8111 if (cur != ';') {
8112 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8113 xmlFree(name);
8114 *str = ptr;
8115 return(NULL);
8116 }
8117 ptr++;
8118
8119 /*
8120 * Increate the number of entity references parsed
8121 */
8122 ctxt->nbentities++;
8123
8124 /*
8125 * Request the entity from SAX
8126 */
8127 if ((ctxt->sax != NULL) &&
8128 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008129 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8130 if (ctxt->instate == XML_PARSER_EOF) {
8131 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008132 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008133 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008134 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008135 if (entity == NULL) {
8136 /*
8137 * [ WFC: Entity Declared ]
8138 * In a document without any DTD, a document with only an
8139 * internal DTD subset which contains no parameter entity
8140 * references, or a document with "standalone='yes'", ...
8141 * ... The declaration of a parameter entity must precede
8142 * any reference to it...
8143 */
8144 if ((ctxt->standalone == 1) ||
8145 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8146 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8147 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008148 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008149 /*
8150 * [ VC: Entity Declared ]
8151 * In a document with an external subset or external
8152 * parameter entities with "standalone='no'", ...
8153 * ... The declaration of a parameter entity must
8154 * precede any reference to it...
8155 */
8156 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8157 "PEReference: %%%s; not found\n",
8158 name, NULL);
8159 ctxt->valid = 0;
8160 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008161 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008162 } else {
8163 /*
8164 * Internal checking in case the entity quest barfed
8165 */
8166 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8167 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8168 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8169 "%%%s; is not a parameter entity\n",
8170 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008171 }
8172 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008173 ctxt->hasPErefs = 1;
8174 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008175 *str = ptr;
8176 return(entity);
8177}
8178
8179/**
8180 * xmlParseDocTypeDecl:
8181 * @ctxt: an XML parser context
8182 *
8183 * parse a DOCTYPE declaration
8184 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008185 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008186 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8187 *
8188 * [ VC: Root Element Type ]
8189 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008190 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008191 */
8192
8193void
8194xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008195 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008196 xmlChar *ExternalID = NULL;
8197 xmlChar *URI = NULL;
8198
8199 /*
8200 * We know that '<!DOCTYPE' has been detected.
8201 */
8202 SKIP(9);
8203
8204 SKIP_BLANKS;
8205
8206 /*
8207 * Parse the DOCTYPE name.
8208 */
8209 name = xmlParseName(ctxt);
8210 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008211 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8212 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008213 }
8214 ctxt->intSubName = name;
8215
8216 SKIP_BLANKS;
8217
8218 /*
8219 * Check for SystemID and ExternalID
8220 */
8221 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8222
8223 if ((URI != NULL) || (ExternalID != NULL)) {
8224 ctxt->hasExternalSubset = 1;
8225 }
8226 ctxt->extSubURI = URI;
8227 ctxt->extSubSystem = ExternalID;
8228
8229 SKIP_BLANKS;
8230
8231 /*
8232 * Create and update the internal subset.
8233 */
8234 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8235 (!ctxt->disableSAX))
8236 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008237 if (ctxt->instate == XML_PARSER_EOF)
8238 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008239
8240 /*
8241 * Is there any internal subset declarations ?
8242 * they are handled separately in xmlParseInternalSubset()
8243 */
8244 if (RAW == '[')
8245 return;
8246
8247 /*
8248 * We should be at the end of the DOCTYPE declaration.
8249 */
8250 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008251 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008252 }
8253 NEXT;
8254}
8255
8256/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008257 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008258 * @ctxt: an XML parser context
8259 *
8260 * parse the internal subset declaration
8261 *
8262 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8263 */
8264
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008265static void
Owen Taylor3473f882001-02-23 17:55:21 +00008266xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8267 /*
8268 * Is there any DTD definition ?
8269 */
8270 if (RAW == '[') {
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008271 int baseInputNr = ctxt->inputNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008272 ctxt->instate = XML_PARSER_DTD;
8273 NEXT;
8274 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008275 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008276 * PEReferences.
8277 * Subsequence (markupdecl | PEReference | S)*
8278 */
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008279 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008280 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008281 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008282 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008283
8284 SKIP_BLANKS;
8285 xmlParseMarkupDecl(ctxt);
8286 xmlParsePEReference(ctxt);
8287
Owen Taylor3473f882001-02-23 17:55:21 +00008288 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008289 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008290 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008291 if (ctxt->inputNr > baseInputNr)
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008292 xmlPopInput(ctxt);
8293 else
8294 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008295 }
8296 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008297 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008298 NEXT;
8299 SKIP_BLANKS;
8300 }
8301 }
8302
8303 /*
8304 * We should be at the end of the DOCTYPE declaration.
8305 */
8306 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008307 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008308 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008309 }
8310 NEXT;
8311}
8312
Daniel Veillard81273902003-09-30 00:43:48 +00008313#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008314/**
8315 * xmlParseAttribute:
8316 * @ctxt: an XML parser context
8317 * @value: a xmlChar ** used to store the value of the attribute
8318 *
8319 * parse an attribute
8320 *
8321 * [41] Attribute ::= Name Eq AttValue
8322 *
8323 * [ WFC: No External Entity References ]
8324 * Attribute values cannot contain direct or indirect entity references
8325 * to external entities.
8326 *
8327 * [ WFC: No < in Attribute Values ]
8328 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008329 * an attribute value (other than "&lt;") must not contain a <.
8330 *
Owen Taylor3473f882001-02-23 17:55:21 +00008331 * [ VC: Attribute Value Type ]
8332 * The attribute must have been declared; the value must be of the type
8333 * declared for it.
8334 *
8335 * [25] Eq ::= S? '=' S?
8336 *
8337 * With namespace:
8338 *
8339 * [NS 11] Attribute ::= QName Eq AttValue
8340 *
8341 * Also the case QName == xmlns:??? is handled independently as a namespace
8342 * definition.
8343 *
8344 * Returns the attribute name, and the value in *value.
8345 */
8346
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008347const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008348xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008349 const xmlChar *name;
8350 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008351
8352 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008353 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008354 name = xmlParseName(ctxt);
8355 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008356 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008357 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008358 return(NULL);
8359 }
8360
8361 /*
8362 * read the value
8363 */
8364 SKIP_BLANKS;
8365 if (RAW == '=') {
8366 NEXT;
8367 SKIP_BLANKS;
8368 val = xmlParseAttValue(ctxt);
8369 ctxt->instate = XML_PARSER_CONTENT;
8370 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008371 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008372 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008373 return(NULL);
8374 }
8375
8376 /*
8377 * Check that xml:lang conforms to the specification
8378 * No more registered as an error, just generate a warning now
8379 * since this was deprecated in XML second edition
8380 */
8381 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8382 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008383 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8384 "Malformed value for xml:lang : %s\n",
8385 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008386 }
8387 }
8388
8389 /*
8390 * Check that xml:space conforms to the specification
8391 */
8392 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8393 if (xmlStrEqual(val, BAD_CAST "default"))
8394 *(ctxt->space) = 0;
8395 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8396 *(ctxt->space) = 1;
8397 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008398 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008399"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008400 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008401 }
8402 }
8403
8404 *value = val;
8405 return(name);
8406}
8407
8408/**
8409 * xmlParseStartTag:
8410 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008411 *
Owen Taylor3473f882001-02-23 17:55:21 +00008412 * parse a start of tag either for rule element or
8413 * EmptyElement. In both case we don't parse the tag closing chars.
8414 *
8415 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8416 *
8417 * [ WFC: Unique Att Spec ]
8418 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008419 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008420 *
8421 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8422 *
8423 * [ WFC: Unique Att Spec ]
8424 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008425 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008426 *
8427 * With namespace:
8428 *
8429 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8430 *
8431 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8432 *
8433 * Returns the element name parsed
8434 */
8435
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008436const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008437xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008438 const xmlChar *name;
8439 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008440 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008441 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008442 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008443 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008444 int i;
8445
8446 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008447 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008448
8449 name = xmlParseName(ctxt);
8450 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008452 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008453 return(NULL);
8454 }
8455
8456 /*
8457 * Now parse the attributes, it ends up with the ending
8458 *
8459 * (S Attribute)* S?
8460 */
8461 SKIP_BLANKS;
8462 GROW;
8463
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008464 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008465 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008466 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008467 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008468 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008469
8470 attname = xmlParseAttribute(ctxt, &attvalue);
8471 if ((attname != NULL) && (attvalue != NULL)) {
8472 /*
8473 * [ WFC: Unique Att Spec ]
8474 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008475 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008476 */
8477 for (i = 0; i < nbatts;i += 2) {
8478 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008479 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008480 xmlFree(attvalue);
8481 goto failed;
8482 }
8483 }
Owen Taylor3473f882001-02-23 17:55:21 +00008484 /*
8485 * Add the pair to atts
8486 */
8487 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008488 maxatts = 22; /* allow for 10 attrs by default */
8489 atts = (const xmlChar **)
8490 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008491 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008492 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008493 if (attvalue != NULL)
8494 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008495 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008496 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008497 ctxt->atts = atts;
8498 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008499 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008500 const xmlChar **n;
8501
Owen Taylor3473f882001-02-23 17:55:21 +00008502 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008503 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008504 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008505 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008506 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008507 if (attvalue != NULL)
8508 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008509 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008510 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008511 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008512 ctxt->atts = atts;
8513 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008514 }
8515 atts[nbatts++] = attname;
8516 atts[nbatts++] = attvalue;
8517 atts[nbatts] = NULL;
8518 atts[nbatts + 1] = NULL;
8519 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008520 if (attvalue != NULL)
8521 xmlFree(attvalue);
8522 }
8523
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008524failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008525
Daniel Veillard3772de32002-12-17 10:31:45 +00008526 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008527 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8528 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008529 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8531 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008532 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008533 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8534 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008535 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8536 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008537 break;
8538 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008539 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008540 GROW;
8541 }
8542
8543 /*
8544 * SAX: Start of Element !
8545 */
8546 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008547 (!ctxt->disableSAX)) {
8548 if (nbatts > 0)
8549 ctxt->sax->startElement(ctxt->userData, name, atts);
8550 else
8551 ctxt->sax->startElement(ctxt->userData, name, NULL);
8552 }
Owen Taylor3473f882001-02-23 17:55:21 +00008553
8554 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008555 /* Free only the content strings */
8556 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008557 if (atts[i] != NULL)
8558 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008559 }
8560 return(name);
8561}
8562
8563/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008564 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008565 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 * @line: line of the start tag
8567 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008568 *
8569 * parse an end of tag
8570 *
8571 * [42] ETag ::= '</' Name S? '>'
8572 *
8573 * With namespace
8574 *
8575 * [NS 9] ETag ::= '</' QName S? '>'
8576 */
8577
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008578static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008580 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008581
8582 GROW;
8583 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008584 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008585 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008586 return;
8587 }
8588 SKIP(2);
8589
Daniel Veillard46de64e2002-05-29 08:21:33 +00008590 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008591
8592 /*
8593 * We should definitely be at the ending "S? '>'" part
8594 */
8595 GROW;
8596 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008597 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008598 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008599 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008600 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008601
8602 /*
8603 * [ WFC: Element Type Match ]
8604 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008605 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008606 *
8607 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008608 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008609 if (name == NULL) name = BAD_CAST "unparseable";
8610 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008611 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008612 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008613 }
8614
8615 /*
8616 * SAX: End of Tag
8617 */
8618 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8619 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008620 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008621
Daniel Veillarde57ec792003-09-10 10:50:59 +00008622 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008623 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008624 return;
8625}
8626
8627/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008628 * xmlParseEndTag:
8629 * @ctxt: an XML parser context
8630 *
8631 * parse an end of tag
8632 *
8633 * [42] ETag ::= '</' Name S? '>'
8634 *
8635 * With namespace
8636 *
8637 * [NS 9] ETag ::= '</' QName S? '>'
8638 */
8639
8640void
8641xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008642 xmlParseEndTag1(ctxt, 0);
8643}
Daniel Veillard81273902003-09-30 00:43:48 +00008644#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008645
8646/************************************************************************
8647 * *
8648 * SAX 2 specific operations *
8649 * *
8650 ************************************************************************/
8651
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652/*
8653 * xmlGetNamespace:
8654 * @ctxt: an XML parser context
8655 * @prefix: the prefix to lookup
8656 *
8657 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008658 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008659 *
8660 * Returns the namespace name or NULL if not bound
8661 */
8662static const xmlChar *
8663xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8664 int i;
8665
Daniel Veillarde57ec792003-09-10 10:50:59 +00008666 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008667 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008668 if (ctxt->nsTab[i] == prefix) {
8669 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8670 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008671 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008672 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673 return(NULL);
8674}
8675
8676/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008677 * xmlParseQName:
8678 * @ctxt: an XML parser context
8679 * @prefix: pointer to store the prefix part
8680 *
8681 * parse an XML Namespace QName
8682 *
8683 * [6] QName ::= (Prefix ':')? LocalPart
8684 * [7] Prefix ::= NCName
8685 * [8] LocalPart ::= NCName
8686 *
8687 * Returns the Name parsed or NULL
8688 */
8689
8690static const xmlChar *
8691xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692 const xmlChar *l, *p;
8693
8694 GROW;
8695
8696 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008697 if (l == NULL) {
8698 if (CUR == ':') {
8699 l = xmlParseName(ctxt);
8700 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008701 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008702 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008703 *prefix = NULL;
8704 return(l);
8705 }
8706 }
8707 return(NULL);
8708 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 if (CUR == ':') {
8710 NEXT;
8711 p = l;
8712 l = xmlParseNCName(ctxt);
8713 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008714 xmlChar *tmp;
8715
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008716 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8717 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008718 l = xmlParseNmtoken(ctxt);
8719 if (l == NULL)
8720 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8721 else {
8722 tmp = xmlBuildQName(l, p, NULL, 0);
8723 xmlFree((char *)l);
8724 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008725 p = xmlDictLookup(ctxt->dict, tmp, -1);
8726 if (tmp != NULL) xmlFree(tmp);
8727 *prefix = NULL;
8728 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008729 }
8730 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008731 xmlChar *tmp;
8732
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008733 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8734 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008735 NEXT;
8736 tmp = (xmlChar *) xmlParseName(ctxt);
8737 if (tmp != NULL) {
8738 tmp = xmlBuildQName(tmp, l, NULL, 0);
8739 l = xmlDictLookup(ctxt->dict, tmp, -1);
8740 if (tmp != NULL) xmlFree(tmp);
8741 *prefix = p;
8742 return(l);
8743 }
8744 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8745 l = xmlDictLookup(ctxt->dict, tmp, -1);
8746 if (tmp != NULL) xmlFree(tmp);
8747 *prefix = p;
8748 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008749 }
8750 *prefix = p;
8751 } else
8752 *prefix = NULL;
8753 return(l);
8754}
8755
8756/**
8757 * xmlParseQNameAndCompare:
8758 * @ctxt: an XML parser context
8759 * @name: the localname
8760 * @prefix: the prefix, if any.
8761 *
8762 * parse an XML name and compares for match
8763 * (specialized for endtag parsing)
8764 *
8765 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8766 * and the name for mismatch
8767 */
8768
8769static const xmlChar *
8770xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8771 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008772 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008773 const xmlChar *in;
8774 const xmlChar *ret;
8775 const xmlChar *prefix2;
8776
8777 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8778
8779 GROW;
8780 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008781
Daniel Veillard0fb18932003-09-07 09:14:37 +00008782 cmp = prefix;
8783 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008784 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008785 ++cmp;
8786 }
8787 if ((*cmp == 0) && (*in == ':')) {
8788 in++;
8789 cmp = name;
8790 while (*in != 0 && *in == *cmp) {
8791 ++in;
8792 ++cmp;
8793 }
William M. Brack76e95df2003-10-18 16:20:14 +00008794 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008795 /* success */
8796 ctxt->input->cur = in;
8797 return((const xmlChar*) 1);
8798 }
8799 }
8800 /*
8801 * all strings coms from the dictionary, equality can be done directly
8802 */
8803 ret = xmlParseQName (ctxt, &prefix2);
8804 if ((ret == name) && (prefix == prefix2))
8805 return((const xmlChar*) 1);
8806 return ret;
8807}
8808
8809/**
8810 * xmlParseAttValueInternal:
8811 * @ctxt: an XML parser context
8812 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008813 * @alloc: whether the attribute was reallocated as a new string
8814 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 *
8816 * parse a value for an attribute.
8817 * NOTE: if no normalization is needed, the routine will return pointers
8818 * directly from the data buffer.
8819 *
8820 * 3.3.3 Attribute-Value Normalization:
8821 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008822 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008823 * - a character reference is processed by appending the referenced
8824 * character to the attribute value
8825 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008826 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8828 * appending #x20 to the normalized value, except that only a single
8829 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008830 * parsed entity or the literal entity value of an internal parsed entity
8831 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832 * If the declared value is not CDATA, then the XML processor must further
8833 * process the normalized attribute value by discarding any leading and
8834 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008835 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008836 * All attributes for which no declaration has been read should be treated
8837 * by a non-validating parser as if declared CDATA.
8838 *
8839 * Returns the AttValue parsed or NULL. The value has to be freed by the
8840 * caller if it was copied, this can be detected by val[*len] == 0.
8841 */
8842
8843static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008844xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8845 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008846{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008847 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008848 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008850 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851
8852 GROW;
8853 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008854 line = ctxt->input->line;
8855 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008856 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008857 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008858 return (NULL);
8859 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008860 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008861
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008862 /*
8863 * try to handle in this routine the most common case where no
8864 * allocation of a new string is required and where content is
8865 * pure ASCII.
8866 */
8867 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008868 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008869 end = ctxt->input->end;
8870 start = in;
8871 if (in >= end) {
8872 const xmlChar *oldbase = ctxt->input->base;
8873 GROW;
8874 if (oldbase != ctxt->input->base) {
8875 long delta = ctxt->input->base - oldbase;
8876 start = start + delta;
8877 in = in + delta;
8878 }
8879 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008880 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008881 if (normalize) {
8882 /*
8883 * Skip any leading spaces
8884 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008885 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008886 ((*in == 0x20) || (*in == 0x9) ||
8887 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008888 if (*in == 0xA) {
8889 line++; col = 1;
8890 } else {
8891 col++;
8892 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008893 in++;
8894 start = in;
8895 if (in >= end) {
8896 const xmlChar *oldbase = ctxt->input->base;
8897 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008898 if (ctxt->instate == XML_PARSER_EOF)
8899 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008900 if (oldbase != ctxt->input->base) {
8901 long delta = ctxt->input->base - oldbase;
8902 start = start + delta;
8903 in = in + delta;
8904 }
8905 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008906 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8907 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8908 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008909 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008910 return(NULL);
8911 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008912 }
8913 }
8914 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8915 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008916 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008917 if ((*in++ == 0x20) && (*in == 0x20)) break;
8918 if (in >= end) {
8919 const xmlChar *oldbase = ctxt->input->base;
8920 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008921 if (ctxt->instate == XML_PARSER_EOF)
8922 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008923 if (oldbase != ctxt->input->base) {
8924 long delta = ctxt->input->base - oldbase;
8925 start = start + delta;
8926 in = in + delta;
8927 }
8928 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008929 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8930 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8931 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008932 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008933 return(NULL);
8934 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008935 }
8936 }
8937 last = in;
8938 /*
8939 * skip the trailing blanks
8940 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008941 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008942 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008943 ((*in == 0x20) || (*in == 0x9) ||
8944 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008945 if (*in == 0xA) {
8946 line++, col = 1;
8947 } else {
8948 col++;
8949 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008950 in++;
8951 if (in >= end) {
8952 const xmlChar *oldbase = ctxt->input->base;
8953 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008954 if (ctxt->instate == XML_PARSER_EOF)
8955 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008956 if (oldbase != ctxt->input->base) {
8957 long delta = ctxt->input->base - oldbase;
8958 start = start + delta;
8959 in = in + delta;
8960 last = last + delta;
8961 }
8962 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008963 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8964 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8965 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008966 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008967 return(NULL);
8968 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008969 }
8970 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008971 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8972 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8973 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008974 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008975 return(NULL);
8976 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008977 if (*in != limit) goto need_complex;
8978 } else {
8979 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8980 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8981 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008982 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008983 if (in >= end) {
8984 const xmlChar *oldbase = ctxt->input->base;
8985 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008986 if (ctxt->instate == XML_PARSER_EOF)
8987 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008988 if (oldbase != ctxt->input->base) {
8989 long delta = ctxt->input->base - oldbase;
8990 start = start + delta;
8991 in = in + delta;
8992 }
8993 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008994 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8995 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8996 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008997 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008998 return(NULL);
8999 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009000 }
9001 }
9002 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009003 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9004 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9005 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009006 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009007 return(NULL);
9008 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009009 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009010 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009011 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009012 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009013 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009014 *len = last - start;
9015 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009016 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009017 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009018 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009019 }
9020 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009021 ctxt->input->line = line;
9022 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009023 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009024 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009025need_complex:
9026 if (alloc) *alloc = 1;
9027 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009028}
9029
9030/**
9031 * xmlParseAttribute2:
9032 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009033 * @pref: the element prefix
9034 * @elem: the element name
9035 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009036 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009037 * @len: an int * to save the length of the attribute
9038 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009039 *
9040 * parse an attribute in the new SAX2 framework.
9041 *
9042 * Returns the attribute name, and the value in *value, .
9043 */
9044
9045static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009046xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009047 const xmlChar * pref, const xmlChar * elem,
9048 const xmlChar ** prefix, xmlChar ** value,
9049 int *len, int *alloc)
9050{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009051 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009052 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009053 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009054
9055 *value = NULL;
9056 GROW;
9057 name = xmlParseQName(ctxt, prefix);
9058 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9060 "error parsing attribute name\n");
9061 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009062 }
9063
9064 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009065 * get the type if needed
9066 */
9067 if (ctxt->attsSpecial != NULL) {
9068 int type;
9069
Nick Wellnhoferd422b952017-10-09 13:37:42 +02009070 type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9071 pref, elem, *prefix, name);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009072 if (type != 0)
9073 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009074 }
9075
9076 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009077 * read the value
9078 */
9079 SKIP_BLANKS;
9080 if (RAW == '=') {
9081 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009082 SKIP_BLANKS;
9083 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9084 if (normalize) {
9085 /*
9086 * Sometimes a second normalisation pass for spaces is needed
9087 * but that only happens if charrefs or entities refernces
9088 * have been used in the attribute value, i.e. the attribute
9089 * value have been extracted in an allocated string already.
9090 */
9091 if (*alloc) {
9092 const xmlChar *val2;
9093
9094 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009095 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009096 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009097 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009098 }
9099 }
9100 }
9101 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009102 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009103 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009104 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009105 name);
9106 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009107 }
9108
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009109 if (*prefix == ctxt->str_xml) {
9110 /*
9111 * Check that xml:lang conforms to the specification
9112 * No more registered as an error, just generate a warning now
9113 * since this was deprecated in XML second edition
9114 */
9115 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9116 internal_val = xmlStrndup(val, *len);
9117 if (!xmlCheckLanguageID(internal_val)) {
9118 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9119 "Malformed value for xml:lang : %s\n",
9120 internal_val, NULL);
9121 }
9122 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009123
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009124 /*
9125 * Check that xml:space conforms to the specification
9126 */
9127 if (xmlStrEqual(name, BAD_CAST "space")) {
9128 internal_val = xmlStrndup(val, *len);
9129 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9130 *(ctxt->space) = 0;
9131 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9132 *(ctxt->space) = 1;
9133 else {
9134 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9135 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9136 internal_val, NULL);
9137 }
9138 }
9139 if (internal_val) {
9140 xmlFree(internal_val);
9141 }
9142 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009143
9144 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009145 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009146}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009147/**
9148 * xmlParseStartTag2:
9149 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009150 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009151 * parse a start of tag either for rule element or
9152 * EmptyElement. In both case we don't parse the tag closing chars.
9153 * This routine is called when running SAX2 parsing
9154 *
9155 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9156 *
9157 * [ WFC: Unique Att Spec ]
9158 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009159 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009160 *
9161 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9162 *
9163 * [ WFC: Unique Att Spec ]
9164 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009165 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009166 *
9167 * With namespace:
9168 *
9169 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9170 *
9171 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9172 *
9173 * Returns the element name parsed
9174 */
9175
9176static const xmlChar *
9177xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009178 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009179 const xmlChar *localname;
9180 const xmlChar *prefix;
9181 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009182 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009183 const xmlChar *nsname;
9184 xmlChar *attvalue;
9185 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009186 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009187 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009188 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009189 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009190 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009191
9192 if (RAW != '<') return(NULL);
9193 NEXT1;
9194
9195 /*
9196 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9197 * point since the attribute values may be stored as pointers to
9198 * the buffer and calling SHRINK would destroy them !
9199 * The Shrinking is only possible once the full set of attribute
9200 * callbacks have been done.
9201 */
9202 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009203 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009204 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009205 nbatts = 0;
9206 nratts = 0;
9207 nbdef = 0;
9208 nbNs = 0;
9209 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009210 /* Forget any namespaces added during an earlier parse of this element. */
9211 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009212
9213 localname = xmlParseQName(ctxt, &prefix);
9214 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009215 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9216 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009217 return(NULL);
9218 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009219 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220
9221 /*
9222 * Now parse the attributes, it ends up with the ending
9223 *
9224 * (S Attribute)* S?
9225 */
9226 SKIP_BLANKS;
9227 GROW;
9228
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009229 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009230 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009231 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009232 const xmlChar *q = CUR_PTR;
9233 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009234 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009235
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009236 attname = xmlParseAttribute2(ctxt, prefix, localname,
9237 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009238 if ((attname == NULL) || (attvalue == NULL))
9239 goto next_attr;
9240 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009241
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009242 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9243 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9244 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009245
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009246 if (URL == NULL) {
9247 xmlErrMemory(ctxt, "dictionary allocation failure");
9248 if ((attvalue != NULL) && (alloc != 0))
9249 xmlFree(attvalue);
9250 return(NULL);
9251 }
9252 if (*URL != 0) {
9253 uri = xmlParseURI((const char *) URL);
9254 if (uri == NULL) {
9255 xmlNsErr(ctxt, XML_WAR_NS_URI,
9256 "xmlns: '%s' is not a valid URI\n",
9257 URL, NULL, NULL);
9258 } else {
9259 if (uri->scheme == NULL) {
9260 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9261 "xmlns: URI %s is not absolute\n",
9262 URL, NULL, NULL);
9263 }
9264 xmlFreeURI(uri);
9265 }
Daniel Veillard37334572008-07-31 08:20:02 +00009266 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009267 if (attname != ctxt->str_xml) {
9268 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9269 "xml namespace URI cannot be the default namespace\n",
9270 NULL, NULL, NULL);
9271 }
9272 goto next_attr;
9273 }
9274 if ((len == 29) &&
9275 (xmlStrEqual(URL,
9276 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9277 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9278 "reuse of the xmlns namespace name is forbidden\n",
9279 NULL, NULL, NULL);
9280 goto next_attr;
9281 }
9282 }
9283 /*
9284 * check that it's not a defined namespace
9285 */
9286 for (j = 1;j <= nbNs;j++)
9287 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9288 break;
9289 if (j <= nbNs)
9290 xmlErrAttributeDup(ctxt, NULL, attname);
9291 else
9292 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009293
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009294 } else if (aprefix == ctxt->str_xmlns) {
9295 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9296 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009297
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009298 if (attname == ctxt->str_xml) {
9299 if (URL != ctxt->str_xml_ns) {
9300 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9301 "xml namespace prefix mapped to wrong URI\n",
9302 NULL, NULL, NULL);
9303 }
9304 /*
9305 * Do not keep a namespace definition node
9306 */
9307 goto next_attr;
9308 }
9309 if (URL == ctxt->str_xml_ns) {
9310 if (attname != ctxt->str_xml) {
9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312 "xml namespace URI mapped to wrong prefix\n",
9313 NULL, NULL, NULL);
9314 }
9315 goto next_attr;
9316 }
9317 if (attname == ctxt->str_xmlns) {
9318 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319 "redefinition of the xmlns prefix is forbidden\n",
9320 NULL, NULL, NULL);
9321 goto next_attr;
9322 }
9323 if ((len == 29) &&
9324 (xmlStrEqual(URL,
9325 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9326 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9327 "reuse of the xmlns namespace name is forbidden\n",
9328 NULL, NULL, NULL);
9329 goto next_attr;
9330 }
9331 if ((URL == NULL) || (URL[0] == 0)) {
9332 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9333 "xmlns:%s: Empty XML namespace is not allowed\n",
9334 attname, NULL, NULL);
9335 goto next_attr;
9336 } else {
9337 uri = xmlParseURI((const char *) URL);
9338 if (uri == NULL) {
9339 xmlNsErr(ctxt, XML_WAR_NS_URI,
9340 "xmlns:%s: '%s' is not a valid URI\n",
9341 attname, URL, NULL);
9342 } else {
9343 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9344 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9345 "xmlns:%s: URI %s is not absolute\n",
9346 attname, URL, NULL);
9347 }
9348 xmlFreeURI(uri);
9349 }
9350 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009351
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009352 /*
9353 * check that it's not a defined namespace
9354 */
9355 for (j = 1;j <= nbNs;j++)
9356 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9357 break;
9358 if (j <= nbNs)
9359 xmlErrAttributeDup(ctxt, aprefix, attname);
9360 else
9361 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9362
9363 } else {
9364 /*
9365 * Add the pair to atts
9366 */
9367 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9368 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9369 goto next_attr;
9370 }
9371 maxatts = ctxt->maxatts;
9372 atts = ctxt->atts;
9373 }
9374 ctxt->attallocs[nratts++] = alloc;
9375 atts[nbatts++] = attname;
9376 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009377 /*
9378 * The namespace URI field is used temporarily to point at the
9379 * base of the current input buffer for non-alloced attributes.
9380 * When the input buffer is reallocated, all the pointers become
9381 * invalid, but they can be reconstructed later.
9382 */
9383 if (alloc)
9384 atts[nbatts++] = NULL;
9385 else
9386 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009387 atts[nbatts++] = attvalue;
9388 attvalue += len;
9389 atts[nbatts++] = attvalue;
9390 /*
9391 * tag if some deallocation is needed
9392 */
9393 if (alloc != 0) attval = 1;
9394 attvalue = NULL; /* moved into atts */
9395 }
9396
9397next_attr:
9398 if ((attvalue != NULL) && (alloc != 0)) {
9399 xmlFree(attvalue);
9400 attvalue = NULL;
9401 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009402
9403 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009404 if (ctxt->instate == XML_PARSER_EOF)
9405 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009406 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9407 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009408 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009409 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9410 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009411 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009412 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009413 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9414 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009416 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009417 break;
9418 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009419 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009420 }
9421
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009422 if (ctxt->input->id != inputid) {
9423 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9424 "Unexpected change of input\n");
9425 localname = NULL;
9426 goto done;
9427 }
9428
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009429 /* Reconstruct attribute value pointers. */
9430 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9431 if (atts[i+2] != NULL) {
9432 /*
9433 * Arithmetic on dangling pointers is technically undefined
9434 * behavior, but well...
9435 */
9436 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9437 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9438 atts[i+3] += offset; /* value */
9439 atts[i+4] += offset; /* valuend */
9440 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009441 }
9442
Daniel Veillard0fb18932003-09-07 09:14:37 +00009443 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009444 * The attributes defaulting
9445 */
9446 if (ctxt->attsDefault != NULL) {
9447 xmlDefAttrsPtr defaults;
9448
9449 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9450 if (defaults != NULL) {
9451 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009452 attname = defaults->values[5 * i];
9453 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009454
9455 /*
9456 * special work for namespaces defaulted defs
9457 */
9458 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9459 /*
9460 * check that it's not a defined namespace
9461 */
9462 for (j = 1;j <= nbNs;j++)
9463 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9464 break;
9465 if (j <= nbNs) continue;
9466
9467 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009468 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009469 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009470 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009471 nbNs++;
9472 }
9473 } else if (aprefix == ctxt->str_xmlns) {
9474 /*
9475 * check that it's not a defined namespace
9476 */
9477 for (j = 1;j <= nbNs;j++)
9478 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9479 break;
9480 if (j <= nbNs) continue;
9481
9482 nsname = xmlGetNamespace(ctxt, attname);
9483 if (nsname != defaults->values[2]) {
9484 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009485 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009486 nbNs++;
9487 }
9488 } else {
9489 /*
9490 * check that it's not a defined attribute
9491 */
9492 for (j = 0;j < nbatts;j+=5) {
9493 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9494 break;
9495 }
9496 if (j < nbatts) continue;
9497
9498 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9499 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009500 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009501 }
9502 maxatts = ctxt->maxatts;
9503 atts = ctxt->atts;
9504 }
9505 atts[nbatts++] = attname;
9506 atts[nbatts++] = aprefix;
9507 if (aprefix == NULL)
9508 atts[nbatts++] = NULL;
9509 else
9510 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009511 atts[nbatts++] = defaults->values[5 * i + 2];
9512 atts[nbatts++] = defaults->values[5 * i + 3];
9513 if ((ctxt->standalone == 1) &&
9514 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009515 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009516 "standalone: attribute %s on %s defaulted from external subset\n",
9517 attname, localname);
9518 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009519 nbdef++;
9520 }
9521 }
9522 }
9523 }
9524
Daniel Veillarde70c8772003-11-25 07:21:18 +00009525 /*
9526 * The attributes checkings
9527 */
9528 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009529 /*
9530 * The default namespace does not apply to attribute names.
9531 */
9532 if (atts[i + 1] != NULL) {
9533 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9534 if (nsname == NULL) {
9535 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9536 "Namespace prefix %s for %s on %s is not defined\n",
9537 atts[i + 1], atts[i], localname);
9538 }
9539 atts[i + 2] = nsname;
9540 } else
9541 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009542 /*
9543 * [ WFC: Unique Att Spec ]
9544 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009545 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009546 * As extended by the Namespace in XML REC.
9547 */
9548 for (j = 0; j < i;j += 5) {
9549 if (atts[i] == atts[j]) {
9550 if (atts[i+1] == atts[j+1]) {
9551 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9552 break;
9553 }
9554 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9555 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9556 "Namespaced Attribute %s in '%s' redefined\n",
9557 atts[i], nsname, NULL);
9558 break;
9559 }
9560 }
9561 }
9562 }
9563
Daniel Veillarde57ec792003-09-10 10:50:59 +00009564 nsname = xmlGetNamespace(ctxt, prefix);
9565 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009566 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9567 "Namespace prefix %s on %s is not defined\n",
9568 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009569 }
9570 *pref = prefix;
9571 *URI = nsname;
9572
9573 /*
9574 * SAX: Start of Element !
9575 */
9576 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9577 (!ctxt->disableSAX)) {
9578 if (nbNs > 0)
9579 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9580 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9581 nbatts / 5, nbdef, atts);
9582 else
9583 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9584 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9585 }
9586
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009587done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009588 /*
9589 * Free up attribute allocated strings if needed
9590 */
9591 if (attval != 0) {
9592 for (i = 3,j = 0; j < nratts;i += 5,j++)
9593 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9594 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009595 }
9596
9597 return(localname);
9598}
9599
9600/**
9601 * xmlParseEndTag2:
9602 * @ctxt: an XML parser context
9603 * @line: line of the start tag
9604 * @nsNr: number of namespaces on the start tag
9605 *
9606 * parse an end of tag
9607 *
9608 * [42] ETag ::= '</' Name S? '>'
9609 *
9610 * With namespace
9611 *
9612 * [NS 9] ETag ::= '</' QName S? '>'
9613 */
9614
9615static void
9616xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009617 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009618 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009619 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009620
9621 GROW;
9622 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009623 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009624 return;
9625 }
9626 SKIP(2);
9627
David Kilzerdb07dd62016-02-12 09:58:29 -08009628 curLength = ctxt->input->end - ctxt->input->cur;
9629 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9630 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9631 if ((curLength >= (size_t)(tlen + 1)) &&
9632 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009633 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009634 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009635 goto done;
9636 }
9637 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009638 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009639 name = (xmlChar*)1;
9640 } else {
9641 if (prefix == NULL)
9642 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9643 else
9644 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9645 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009646
9647 /*
9648 * We should definitely be at the ending "S? '>'" part
9649 */
9650 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009651 if (ctxt->instate == XML_PARSER_EOF)
9652 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009653 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009654 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009655 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009656 } else
9657 NEXT1;
9658
9659 /*
9660 * [ WFC: Element Type Match ]
9661 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009662 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009663 *
9664 */
9665 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009666 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009667 if ((line == 0) && (ctxt->node != NULL))
9668 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009669 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009670 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009671 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009672 }
9673
9674 /*
9675 * SAX: End of Tag
9676 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009677done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009678 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9679 (!ctxt->disableSAX))
9680 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9681
Daniel Veillard0fb18932003-09-07 09:14:37 +00009682 spacePop(ctxt);
9683 if (nsNr != 0)
9684 nsPop(ctxt, nsNr);
9685 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009686}
9687
9688/**
Owen Taylor3473f882001-02-23 17:55:21 +00009689 * xmlParseCDSect:
9690 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009691 *
Owen Taylor3473f882001-02-23 17:55:21 +00009692 * Parse escaped pure raw content.
9693 *
9694 * [18] CDSect ::= CDStart CData CDEnd
9695 *
9696 * [19] CDStart ::= '<![CDATA['
9697 *
9698 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9699 *
9700 * [21] CDEnd ::= ']]>'
9701 */
9702void
9703xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9704 xmlChar *buf = NULL;
9705 int len = 0;
9706 int size = XML_PARSER_BUFFER_SIZE;
9707 int r, rl;
9708 int s, sl;
9709 int cur, l;
9710 int count = 0;
9711
Daniel Veillard8f597c32003-10-06 08:19:27 +00009712 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009713 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009714 SKIP(9);
9715 } else
9716 return;
9717
9718 ctxt->instate = XML_PARSER_CDATA_SECTION;
9719 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009720 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009721 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009722 ctxt->instate = XML_PARSER_CONTENT;
9723 return;
9724 }
9725 NEXTL(rl);
9726 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009727 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009728 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009729 ctxt->instate = XML_PARSER_CONTENT;
9730 return;
9731 }
9732 NEXTL(sl);
9733 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009734 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009735 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009736 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009737 return;
9738 }
William M. Brack871611b2003-10-18 04:53:14 +00009739 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009740 ((r != ']') || (s != ']') || (cur != '>'))) {
9741 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009742 xmlChar *tmp;
9743
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009744 if ((size > XML_MAX_TEXT_LENGTH) &&
9745 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9746 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9747 "CData section too big found", NULL);
9748 xmlFree (buf);
9749 return;
9750 }
9751 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009752 if (tmp == NULL) {
9753 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009754 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009755 return;
9756 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009757 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009758 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009759 }
9760 COPY_BUF(rl,buf,len,r);
9761 r = s;
9762 rl = sl;
9763 s = cur;
9764 sl = l;
9765 count++;
9766 if (count > 50) {
9767 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009768 if (ctxt->instate == XML_PARSER_EOF) {
9769 xmlFree(buf);
9770 return;
9771 }
Owen Taylor3473f882001-02-23 17:55:21 +00009772 count = 0;
9773 }
9774 NEXTL(l);
9775 cur = CUR_CHAR(l);
9776 }
9777 buf[len] = 0;
9778 ctxt->instate = XML_PARSER_CONTENT;
9779 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009780 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009781 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009782 xmlFree(buf);
9783 return;
9784 }
9785 NEXTL(l);
9786
9787 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009788 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009789 */
9790 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9791 if (ctxt->sax->cdataBlock != NULL)
9792 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009793 else if (ctxt->sax->characters != NULL)
9794 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009795 }
9796 xmlFree(buf);
9797}
9798
9799/**
9800 * xmlParseContent:
9801 * @ctxt: an XML parser context
9802 *
9803 * Parse a content:
9804 *
9805 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9806 */
9807
9808void
9809xmlParseContent(xmlParserCtxtPtr ctxt) {
9810 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009811 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009812 ((RAW != '<') || (NXT(1) != '/')) &&
9813 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009814 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009815 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009816 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009817
9818 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009819 * First case : a Processing Instruction.
9820 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009821 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009822 xmlParsePI(ctxt);
9823 }
9824
9825 /*
9826 * Second case : a CDSection
9827 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009828 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009829 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009830 xmlParseCDSect(ctxt);
9831 }
9832
9833 /*
9834 * Third case : a comment
9835 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009836 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009837 (NXT(2) == '-') && (NXT(3) == '-')) {
9838 xmlParseComment(ctxt);
9839 ctxt->instate = XML_PARSER_CONTENT;
9840 }
9841
9842 /*
9843 * Fourth case : a sub-element.
9844 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009845 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009846 xmlParseElement(ctxt);
9847 }
9848
9849 /*
9850 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009851 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009852 */
9853
Daniel Veillard21a0f912001-02-25 19:54:14 +00009854 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009855 xmlParseReference(ctxt);
9856 }
9857
9858 /*
9859 * Last case, text. Note that References are handled directly.
9860 */
9861 else {
9862 xmlParseCharData(ctxt, 0);
9863 }
9864
9865 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009866 SHRINK;
9867
Daniel Veillardfdc91562002-07-01 21:52:03 +00009868 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009869 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9870 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009871 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009872 break;
9873 }
9874 }
9875}
9876
9877/**
9878 * xmlParseElement:
9879 * @ctxt: an XML parser context
9880 *
9881 * parse an XML element, this is highly recursive
9882 *
9883 * [39] element ::= EmptyElemTag | STag content ETag
9884 *
9885 * [ WFC: Element Type Match ]
9886 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009887 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009888 *
Owen Taylor3473f882001-02-23 17:55:21 +00009889 */
9890
9891void
9892xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009893 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009894 const xmlChar *prefix = NULL;
9895 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009896 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009897 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009898 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009899 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009900
Daniel Veillard8915c152008-08-26 13:05:34 +00009901 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9902 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9903 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9904 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9905 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08009906 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009907 return;
9908 }
9909
Owen Taylor3473f882001-02-23 17:55:21 +00009910 /* Capture start position */
9911 if (ctxt->record_info) {
9912 node_info.begin_pos = ctxt->input->consumed +
9913 (CUR_PTR - ctxt->input->base);
9914 node_info.begin_line = ctxt->input->line;
9915 }
9916
9917 if (ctxt->spaceNr == 0)
9918 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009919 else if (*ctxt->space == -2)
9920 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009921 else
9922 spacePush(ctxt, *ctxt->space);
9923
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009924 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009925#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009926 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009927#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009928 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009929#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009930 else
9931 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009932#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009933 if (ctxt->instate == XML_PARSER_EOF)
9934 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009935 if (name == NULL) {
9936 spacePop(ctxt);
9937 return;
9938 }
9939 namePush(ctxt, name);
9940 ret = ctxt->node;
9941
Daniel Veillard4432df22003-09-28 18:58:27 +00009942#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009943 /*
9944 * [ VC: Root Element Type ]
9945 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009946 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009947 */
9948 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9949 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9950 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009951#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009952
9953 /*
9954 * Check for an Empty Element.
9955 */
9956 if ((RAW == '/') && (NXT(1) == '>')) {
9957 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009958 if (ctxt->sax2) {
9959 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9960 (!ctxt->disableSAX))
9961 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009962#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009963 } else {
9964 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9965 (!ctxt->disableSAX))
9966 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009967#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009968 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009969 namePop(ctxt);
9970 spacePop(ctxt);
9971 if (nsNr != ctxt->nsNr)
9972 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009973 if ( ret != NULL && ctxt->record_info ) {
9974 node_info.end_pos = ctxt->input->consumed +
9975 (CUR_PTR - ctxt->input->base);
9976 node_info.end_line = ctxt->input->line;
9977 node_info.node = ret;
9978 xmlParserAddNodeInfo(ctxt, &node_info);
9979 }
9980 return;
9981 }
9982 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009983 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009984 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009985 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9986 "Couldn't find end of Start Tag %s line %d\n",
9987 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009988
9989 /*
9990 * end of parsing of this node.
9991 */
9992 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009993 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009994 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009995 if (nsNr != ctxt->nsNr)
9996 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009997
9998 /*
9999 * Capture end position and add node
10000 */
10001 if ( ret != NULL && ctxt->record_info ) {
10002 node_info.end_pos = ctxt->input->consumed +
10003 (CUR_PTR - ctxt->input->base);
10004 node_info.end_line = ctxt->input->line;
10005 node_info.node = ret;
10006 xmlParserAddNodeInfo(ctxt, &node_info);
10007 }
10008 return;
10009 }
10010
10011 /*
10012 * Parse the content of the element:
10013 */
10014 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010015 if (ctxt->instate == XML_PARSER_EOF)
10016 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010017 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010018 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010019 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010020 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010021
10022 /*
10023 * end of parsing of this node.
10024 */
10025 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010026 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010027 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010028 if (nsNr != ctxt->nsNr)
10029 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010030 return;
10031 }
10032
10033 /*
10034 * parse the end of tag: '</' should be here.
10035 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010036 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010037 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010038 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010039 }
10040#ifdef LIBXML_SAX1_ENABLED
10041 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010042 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010043#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010044
10045 /*
10046 * Capture end position and add node
10047 */
10048 if ( ret != NULL && ctxt->record_info ) {
10049 node_info.end_pos = ctxt->input->consumed +
10050 (CUR_PTR - ctxt->input->base);
10051 node_info.end_line = ctxt->input->line;
10052 node_info.node = ret;
10053 xmlParserAddNodeInfo(ctxt, &node_info);
10054 }
10055}
10056
10057/**
10058 * xmlParseVersionNum:
10059 * @ctxt: an XML parser context
10060 *
10061 * parse the XML version value.
10062 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010063 * [26] VersionNum ::= '1.' [0-9]+
10064 *
10065 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010066 *
10067 * Returns the string giving the XML version number, or NULL
10068 */
10069xmlChar *
10070xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10071 xmlChar *buf = NULL;
10072 int len = 0;
10073 int size = 10;
10074 xmlChar cur;
10075
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010076 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010077 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010078 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010079 return(NULL);
10080 }
10081 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010082 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010083 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010084 return(NULL);
10085 }
10086 buf[len++] = cur;
10087 NEXT;
10088 cur=CUR;
10089 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010090 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010091 return(NULL);
10092 }
10093 buf[len++] = cur;
10094 NEXT;
10095 cur=CUR;
10096 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010097 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010098 xmlChar *tmp;
10099
Owen Taylor3473f882001-02-23 17:55:21 +000010100 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010101 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10102 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010103 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010104 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010105 return(NULL);
10106 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010107 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010108 }
10109 buf[len++] = cur;
10110 NEXT;
10111 cur=CUR;
10112 }
10113 buf[len] = 0;
10114 return(buf);
10115}
10116
10117/**
10118 * xmlParseVersionInfo:
10119 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010120 *
Owen Taylor3473f882001-02-23 17:55:21 +000010121 * parse the XML version.
10122 *
10123 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010124 *
Owen Taylor3473f882001-02-23 17:55:21 +000010125 * [25] Eq ::= S? '=' S?
10126 *
10127 * Returns the version string, e.g. "1.0"
10128 */
10129
10130xmlChar *
10131xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10132 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010133
Daniel Veillarda07050d2003-10-19 14:46:32 +000010134 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010135 SKIP(7);
10136 SKIP_BLANKS;
10137 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010138 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010139 return(NULL);
10140 }
10141 NEXT;
10142 SKIP_BLANKS;
10143 if (RAW == '"') {
10144 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010145 version = xmlParseVersionNum(ctxt);
10146 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010147 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010148 } else
10149 NEXT;
10150 } else if (RAW == '\''){
10151 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010152 version = xmlParseVersionNum(ctxt);
10153 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010154 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010155 } else
10156 NEXT;
10157 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010158 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010159 }
10160 }
10161 return(version);
10162}
10163
10164/**
10165 * xmlParseEncName:
10166 * @ctxt: an XML parser context
10167 *
10168 * parse the XML encoding name
10169 *
10170 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10171 *
10172 * Returns the encoding name value or NULL
10173 */
10174xmlChar *
10175xmlParseEncName(xmlParserCtxtPtr ctxt) {
10176 xmlChar *buf = NULL;
10177 int len = 0;
10178 int size = 10;
10179 xmlChar cur;
10180
10181 cur = CUR;
10182 if (((cur >= 'a') && (cur <= 'z')) ||
10183 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010184 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010185 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010186 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010187 return(NULL);
10188 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010189
Owen Taylor3473f882001-02-23 17:55:21 +000010190 buf[len++] = cur;
10191 NEXT;
10192 cur = CUR;
10193 while (((cur >= 'a') && (cur <= 'z')) ||
10194 ((cur >= 'A') && (cur <= 'Z')) ||
10195 ((cur >= '0') && (cur <= '9')) ||
10196 (cur == '.') || (cur == '_') ||
10197 (cur == '-')) {
10198 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010199 xmlChar *tmp;
10200
Owen Taylor3473f882001-02-23 17:55:21 +000010201 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010202 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10203 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010204 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010205 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010206 return(NULL);
10207 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010208 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010209 }
10210 buf[len++] = cur;
10211 NEXT;
10212 cur = CUR;
10213 if (cur == 0) {
10214 SHRINK;
10215 GROW;
10216 cur = CUR;
10217 }
10218 }
10219 buf[len] = 0;
10220 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010221 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010222 }
10223 return(buf);
10224}
10225
10226/**
10227 * xmlParseEncodingDecl:
10228 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010229 *
Owen Taylor3473f882001-02-23 17:55:21 +000010230 * parse the XML encoding declaration
10231 *
10232 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10233 *
10234 * this setups the conversion filters.
10235 *
10236 * Returns the encoding value or NULL
10237 */
10238
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010239const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010240xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10241 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010242
10243 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010244 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010245 SKIP(8);
10246 SKIP_BLANKS;
10247 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010248 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010249 return(NULL);
10250 }
10251 NEXT;
10252 SKIP_BLANKS;
10253 if (RAW == '"') {
10254 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010255 encoding = xmlParseEncName(ctxt);
10256 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010257 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010258 xmlFree((xmlChar *) encoding);
10259 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010260 } else
10261 NEXT;
10262 } else if (RAW == '\''){
10263 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010264 encoding = xmlParseEncName(ctxt);
10265 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010266 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010267 xmlFree((xmlChar *) encoding);
10268 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010269 } else
10270 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010271 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010272 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010273 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010274
10275 /*
10276 * Non standard parsing, allowing the user to ignore encoding
10277 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010278 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10279 xmlFree((xmlChar *) encoding);
10280 return(NULL);
10281 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010282
Daniel Veillard6b621b82003-08-11 15:03:34 +000010283 /*
10284 * UTF-16 encoding stwich has already taken place at this stage,
10285 * more over the little-endian/big-endian selection is already done
10286 */
10287 if ((encoding != NULL) &&
10288 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10289 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010290 /*
10291 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010292 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010293 * document is apparently UTF-8 compatible, then raise an
10294 * encoding mismatch fatal error
10295 */
10296 if ((ctxt->encoding == NULL) &&
10297 (ctxt->input->buf != NULL) &&
10298 (ctxt->input->buf->encoder == NULL)) {
10299 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10300 "Document labelled UTF-16 but has UTF-8 content\n");
10301 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010302 if (ctxt->encoding != NULL)
10303 xmlFree((xmlChar *) ctxt->encoding);
10304 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010305 }
10306 /*
10307 * UTF-8 encoding is handled natively
10308 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010309 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010310 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10311 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010312 if (ctxt->encoding != NULL)
10313 xmlFree((xmlChar *) ctxt->encoding);
10314 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010315 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010316 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010317 xmlCharEncodingHandlerPtr handler;
10318
10319 if (ctxt->input->encoding != NULL)
10320 xmlFree((xmlChar *) ctxt->input->encoding);
10321 ctxt->input->encoding = encoding;
10322
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010323 handler = xmlFindCharEncodingHandler((const char *) encoding);
10324 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010325 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10326 /* failed to convert */
10327 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10328 return(NULL);
10329 }
Owen Taylor3473f882001-02-23 17:55:21 +000010330 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010331 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010332 "Unsupported encoding %s\n", encoding);
10333 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010334 }
10335 }
10336 }
10337 return(encoding);
10338}
10339
10340/**
10341 * xmlParseSDDecl:
10342 * @ctxt: an XML parser context
10343 *
10344 * parse the XML standalone declaration
10345 *
10346 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010347 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010348 *
10349 * [ VC: Standalone Document Declaration ]
10350 * TODO The standalone document declaration must have the value "no"
10351 * if any external markup declarations contain declarations of:
10352 * - attributes with default values, if elements to which these
10353 * attributes apply appear in the document without specifications
10354 * of values for these attributes, or
10355 * - entities (other than amp, lt, gt, apos, quot), if references
10356 * to those entities appear in the document, or
10357 * - attributes with values subject to normalization, where the
10358 * attribute appears in the document with a value which will change
10359 * as a result of normalization, or
10360 * - element types with element content, if white space occurs directly
10361 * within any instance of those types.
10362 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010363 * Returns:
10364 * 1 if standalone="yes"
10365 * 0 if standalone="no"
10366 * -2 if standalone attribute is missing or invalid
10367 * (A standalone value of -2 means that the XML declaration was found,
10368 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010369 */
10370
10371int
10372xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010373 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010374
10375 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010376 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010377 SKIP(10);
10378 SKIP_BLANKS;
10379 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010381 return(standalone);
10382 }
10383 NEXT;
10384 SKIP_BLANKS;
10385 if (RAW == '\''){
10386 NEXT;
10387 if ((RAW == 'n') && (NXT(1) == 'o')) {
10388 standalone = 0;
10389 SKIP(2);
10390 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10391 (NXT(2) == 's')) {
10392 standalone = 1;
10393 SKIP(3);
10394 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010395 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010396 }
10397 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010398 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010399 } else
10400 NEXT;
10401 } else if (RAW == '"'){
10402 NEXT;
10403 if ((RAW == 'n') && (NXT(1) == 'o')) {
10404 standalone = 0;
10405 SKIP(2);
10406 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10407 (NXT(2) == 's')) {
10408 standalone = 1;
10409 SKIP(3);
10410 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010411 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010412 }
10413 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010414 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010415 } else
10416 NEXT;
10417 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010418 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010419 }
10420 }
10421 return(standalone);
10422}
10423
10424/**
10425 * xmlParseXMLDecl:
10426 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010427 *
Owen Taylor3473f882001-02-23 17:55:21 +000010428 * parse an XML declaration header
10429 *
10430 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10431 */
10432
10433void
10434xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10435 xmlChar *version;
10436
10437 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010438 * This value for standalone indicates that the document has an
10439 * XML declaration but it does not have a standalone attribute.
10440 * It will be overwritten later if a standalone attribute is found.
10441 */
10442 ctxt->input->standalone = -2;
10443
10444 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010445 * We know that '<?xml' is here.
10446 */
10447 SKIP(5);
10448
William M. Brack76e95df2003-10-18 16:20:14 +000010449 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010450 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10451 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010452 }
10453 SKIP_BLANKS;
10454
10455 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010456 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010457 */
10458 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010459 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010460 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010461 } else {
10462 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10463 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010464 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010465 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010466 if (ctxt->options & XML_PARSE_OLD10) {
10467 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10468 "Unsupported version '%s'\n",
10469 version);
10470 } else {
10471 if ((version[0] == '1') && ((version[1] == '.'))) {
10472 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10473 "Unsupported version '%s'\n",
10474 version, NULL);
10475 } else {
10476 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10477 "Unsupported version '%s'\n",
10478 version);
10479 }
10480 }
Daniel Veillard19840942001-11-29 16:11:38 +000010481 }
10482 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010483 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010484 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010485 }
Owen Taylor3473f882001-02-23 17:55:21 +000010486
10487 /*
10488 * We may have the encoding declaration
10489 */
William M. Brack76e95df2003-10-18 16:20:14 +000010490 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010491 if ((RAW == '?') && (NXT(1) == '>')) {
10492 SKIP(2);
10493 return;
10494 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010496 }
10497 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010498 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10499 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010500 /*
10501 * The XML REC instructs us to stop parsing right here
10502 */
10503 return;
10504 }
10505
10506 /*
10507 * We may have the standalone status.
10508 */
William M. Brack76e95df2003-10-18 16:20:14 +000010509 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010510 if ((RAW == '?') && (NXT(1) == '>')) {
10511 SKIP(2);
10512 return;
10513 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010515 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010516
10517 /*
10518 * We can grow the input buffer freely at that point
10519 */
10520 GROW;
10521
Owen Taylor3473f882001-02-23 17:55:21 +000010522 SKIP_BLANKS;
10523 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10524
10525 SKIP_BLANKS;
10526 if ((RAW == '?') && (NXT(1) == '>')) {
10527 SKIP(2);
10528 } else if (RAW == '>') {
10529 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010530 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010531 NEXT;
10532 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010534 MOVETO_ENDTAG(CUR_PTR);
10535 NEXT;
10536 }
10537}
10538
10539/**
10540 * xmlParseMisc:
10541 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010542 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010543 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010544 *
10545 * [27] Misc ::= Comment | PI | S
10546 */
10547
10548void
10549xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010550 while ((ctxt->instate != XML_PARSER_EOF) &&
10551 (((RAW == '<') && (NXT(1) == '?')) ||
10552 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10553 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010554 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010555 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010556 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010557 NEXT;
10558 } else
10559 xmlParseComment(ctxt);
10560 }
10561}
10562
10563/**
10564 * xmlParseDocument:
10565 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010566 *
Owen Taylor3473f882001-02-23 17:55:21 +000010567 * parse an XML document (and build a tree if using the standard SAX
10568 * interface).
10569 *
10570 * [1] document ::= prolog element Misc*
10571 *
10572 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10573 *
10574 * Returns 0, -1 in case of error. the parser context is augmented
10575 * as a result of the parsing.
10576 */
10577
10578int
10579xmlParseDocument(xmlParserCtxtPtr ctxt) {
10580 xmlChar start[4];
10581 xmlCharEncoding enc;
10582
10583 xmlInitParser();
10584
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010585 if ((ctxt == NULL) || (ctxt->input == NULL))
10586 return(-1);
10587
Owen Taylor3473f882001-02-23 17:55:21 +000010588 GROW;
10589
10590 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010591 * SAX: detecting the level.
10592 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010593 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010594
10595 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010596 * SAX: beginning of the document processing.
10597 */
10598 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10599 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010600 if (ctxt->instate == XML_PARSER_EOF)
10601 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010602
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010603 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010604 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010605 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010606 * Get the 4 first bytes and decode the charset
10607 * if enc != XML_CHAR_ENCODING_NONE
10608 * plug some encoding conversion routines.
10609 */
10610 start[0] = RAW;
10611 start[1] = NXT(1);
10612 start[2] = NXT(2);
10613 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010614 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010615 if (enc != XML_CHAR_ENCODING_NONE) {
10616 xmlSwitchEncoding(ctxt, enc);
10617 }
Owen Taylor3473f882001-02-23 17:55:21 +000010618 }
10619
10620
10621 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010622 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010623 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010624 }
10625
10626 /*
10627 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010628 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010629 * than just the first line, unless the amount of data is really
10630 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010631 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010632 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10633 GROW;
10634 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010635 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010636
10637 /*
10638 * Note that we will switch encoding on the fly.
10639 */
10640 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010641 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10642 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010643 /*
10644 * The XML REC instructs us to stop parsing right here
10645 */
10646 return(-1);
10647 }
10648 ctxt->standalone = ctxt->input->standalone;
10649 SKIP_BLANKS;
10650 } else {
10651 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10652 }
10653 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10654 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010655 if (ctxt->instate == XML_PARSER_EOF)
10656 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010657 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10658 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10659 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10660 }
Owen Taylor3473f882001-02-23 17:55:21 +000010661
10662 /*
10663 * The Misc part of the Prolog
10664 */
10665 GROW;
10666 xmlParseMisc(ctxt);
10667
10668 /*
10669 * Then possibly doc type declaration(s) and more Misc
10670 * (doctypedecl Misc*)?
10671 */
10672 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010673 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010674
10675 ctxt->inSubset = 1;
10676 xmlParseDocTypeDecl(ctxt);
10677 if (RAW == '[') {
10678 ctxt->instate = XML_PARSER_DTD;
10679 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010680 if (ctxt->instate == XML_PARSER_EOF)
10681 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010682 }
10683
10684 /*
10685 * Create and update the external subset.
10686 */
10687 ctxt->inSubset = 2;
10688 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10689 (!ctxt->disableSAX))
10690 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10691 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010692 if (ctxt->instate == XML_PARSER_EOF)
10693 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010694 ctxt->inSubset = 0;
10695
Daniel Veillardac4118d2008-01-11 05:27:32 +000010696 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010697
10698 ctxt->instate = XML_PARSER_PROLOG;
10699 xmlParseMisc(ctxt);
10700 }
10701
10702 /*
10703 * Time to start parsing the tree itself
10704 */
10705 GROW;
10706 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010707 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10708 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010709 } else {
10710 ctxt->instate = XML_PARSER_CONTENT;
10711 xmlParseElement(ctxt);
10712 ctxt->instate = XML_PARSER_EPILOG;
10713
10714
10715 /*
10716 * The Misc part at the end
10717 */
10718 xmlParseMisc(ctxt);
10719
Daniel Veillard561b7f82002-03-20 21:55:57 +000010720 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010721 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010722 }
10723 ctxt->instate = XML_PARSER_EOF;
10724 }
10725
10726 /*
10727 * SAX: end of the document processing.
10728 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010729 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010730 ctxt->sax->endDocument(ctxt->userData);
10731
Daniel Veillard5997aca2002-03-18 18:36:20 +000010732 /*
10733 * Remove locally kept entity definitions if the tree was not built
10734 */
10735 if ((ctxt->myDoc != NULL) &&
10736 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10737 xmlFreeDoc(ctxt->myDoc);
10738 ctxt->myDoc = NULL;
10739 }
10740
Daniel Veillardae0765b2008-07-31 19:54:59 +000010741 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10742 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10743 if (ctxt->valid)
10744 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10745 if (ctxt->nsWellFormed)
10746 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10747 if (ctxt->options & XML_PARSE_OLD10)
10748 ctxt->myDoc->properties |= XML_DOC_OLD10;
10749 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010750 if (! ctxt->wellFormed) {
10751 ctxt->valid = 0;
10752 return(-1);
10753 }
Owen Taylor3473f882001-02-23 17:55:21 +000010754 return(0);
10755}
10756
10757/**
10758 * xmlParseExtParsedEnt:
10759 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010760 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010761 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010762 * An external general parsed entity is well-formed if it matches the
10763 * production labeled extParsedEnt.
10764 *
10765 * [78] extParsedEnt ::= TextDecl? content
10766 *
10767 * Returns 0, -1 in case of error. the parser context is augmented
10768 * as a result of the parsing.
10769 */
10770
10771int
10772xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10773 xmlChar start[4];
10774 xmlCharEncoding enc;
10775
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010776 if ((ctxt == NULL) || (ctxt->input == NULL))
10777 return(-1);
10778
Owen Taylor3473f882001-02-23 17:55:21 +000010779 xmlDefaultSAXHandlerInit();
10780
Daniel Veillard309f81d2003-09-23 09:02:53 +000010781 xmlDetectSAX2(ctxt);
10782
Owen Taylor3473f882001-02-23 17:55:21 +000010783 GROW;
10784
10785 /*
10786 * SAX: beginning of the document processing.
10787 */
10788 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10789 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10790
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010791 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010792 * Get the 4 first bytes and decode the charset
10793 * if enc != XML_CHAR_ENCODING_NONE
10794 * plug some encoding conversion routines.
10795 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010796 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10797 start[0] = RAW;
10798 start[1] = NXT(1);
10799 start[2] = NXT(2);
10800 start[3] = NXT(3);
10801 enc = xmlDetectCharEncoding(start, 4);
10802 if (enc != XML_CHAR_ENCODING_NONE) {
10803 xmlSwitchEncoding(ctxt, enc);
10804 }
Owen Taylor3473f882001-02-23 17:55:21 +000010805 }
10806
10807
10808 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010809 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010810 }
10811
10812 /*
10813 * Check for the XMLDecl in the Prolog.
10814 */
10815 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010816 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010817
10818 /*
10819 * Note that we will switch encoding on the fly.
10820 */
10821 xmlParseXMLDecl(ctxt);
10822 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10823 /*
10824 * The XML REC instructs us to stop parsing right here
10825 */
10826 return(-1);
10827 }
10828 SKIP_BLANKS;
10829 } else {
10830 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10831 }
10832 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10833 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010834 if (ctxt->instate == XML_PARSER_EOF)
10835 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010836
10837 /*
10838 * Doing validity checking on chunk doesn't make sense
10839 */
10840 ctxt->instate = XML_PARSER_CONTENT;
10841 ctxt->validate = 0;
10842 ctxt->loadsubset = 0;
10843 ctxt->depth = 0;
10844
10845 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010846 if (ctxt->instate == XML_PARSER_EOF)
10847 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010848
Owen Taylor3473f882001-02-23 17:55:21 +000010849 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010850 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010851 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010852 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010853 }
10854
10855 /*
10856 * SAX: end of the document processing.
10857 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010858 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010859 ctxt->sax->endDocument(ctxt->userData);
10860
10861 if (! ctxt->wellFormed) return(-1);
10862 return(0);
10863}
10864
Daniel Veillard73b013f2003-09-30 12:36:01 +000010865#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010866/************************************************************************
10867 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010868 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010869 * *
10870 ************************************************************************/
10871
10872/**
10873 * xmlParseLookupSequence:
10874 * @ctxt: an XML parser context
10875 * @first: the first char to lookup
10876 * @next: the next char to lookup or zero
10877 * @third: the next char to lookup or zero
10878 *
10879 * Try to find if a sequence (first, next, third) or just (first next) or
10880 * (first) is available in the input stream.
10881 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10882 * to avoid rescanning sequences of bytes, it DOES change the state of the
10883 * parser, do not use liberally.
10884 *
10885 * Returns the index to the current parsing point if the full sequence
10886 * is available, -1 otherwise.
10887 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010888static int
Owen Taylor3473f882001-02-23 17:55:21 +000010889xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10890 xmlChar next, xmlChar third) {
10891 int base, len;
10892 xmlParserInputPtr in;
10893 const xmlChar *buf;
10894
10895 in = ctxt->input;
10896 if (in == NULL) return(-1);
10897 base = in->cur - in->base;
10898 if (base < 0) return(-1);
10899 if (ctxt->checkIndex > base)
10900 base = ctxt->checkIndex;
10901 if (in->buf == NULL) {
10902 buf = in->base;
10903 len = in->length;
10904 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010905 buf = xmlBufContent(in->buf->buffer);
10906 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010907 }
10908 /* take into account the sequence length */
10909 if (third) len -= 2;
10910 else if (next) len --;
10911 for (;base < len;base++) {
10912 if (buf[base] == first) {
10913 if (third != 0) {
10914 if ((buf[base + 1] != next) ||
10915 (buf[base + 2] != third)) continue;
10916 } else if (next != 0) {
10917 if (buf[base + 1] != next) continue;
10918 }
10919 ctxt->checkIndex = 0;
10920#ifdef DEBUG_PUSH
10921 if (next == 0)
10922 xmlGenericError(xmlGenericErrorContext,
10923 "PP: lookup '%c' found at %d\n",
10924 first, base);
10925 else if (third == 0)
10926 xmlGenericError(xmlGenericErrorContext,
10927 "PP: lookup '%c%c' found at %d\n",
10928 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010929 else
Owen Taylor3473f882001-02-23 17:55:21 +000010930 xmlGenericError(xmlGenericErrorContext,
10931 "PP: lookup '%c%c%c' found at %d\n",
10932 first, next, third, base);
10933#endif
10934 return(base - (in->cur - in->base));
10935 }
10936 }
10937 ctxt->checkIndex = base;
10938#ifdef DEBUG_PUSH
10939 if (next == 0)
10940 xmlGenericError(xmlGenericErrorContext,
10941 "PP: lookup '%c' failed\n", first);
10942 else if (third == 0)
10943 xmlGenericError(xmlGenericErrorContext,
10944 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010945 else
Owen Taylor3473f882001-02-23 17:55:21 +000010946 xmlGenericError(xmlGenericErrorContext,
10947 "PP: lookup '%c%c%c' failed\n", first, next, third);
10948#endif
10949 return(-1);
10950}
10951
10952/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010953 * xmlParseGetLasts:
10954 * @ctxt: an XML parser context
10955 * @lastlt: pointer to store the last '<' from the input
10956 * @lastgt: pointer to store the last '>' from the input
10957 *
10958 * Lookup the last < and > in the current chunk
10959 */
10960static void
10961xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10962 const xmlChar **lastgt) {
10963 const xmlChar *tmp;
10964
10965 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10966 xmlGenericError(xmlGenericErrorContext,
10967 "Internal error: xmlParseGetLasts\n");
10968 return;
10969 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010970 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010971 tmp = ctxt->input->end;
10972 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010973 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010974 if (tmp < ctxt->input->base) {
10975 *lastlt = NULL;
10976 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010977 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010978 *lastlt = tmp;
10979 tmp++;
10980 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10981 if (*tmp == '\'') {
10982 tmp++;
10983 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10984 if (tmp < ctxt->input->end) tmp++;
10985 } else if (*tmp == '"') {
10986 tmp++;
10987 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10988 if (tmp < ctxt->input->end) tmp++;
10989 } else
10990 tmp++;
10991 }
10992 if (tmp < ctxt->input->end)
10993 *lastgt = tmp;
10994 else {
10995 tmp = *lastlt;
10996 tmp--;
10997 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10998 if (tmp >= ctxt->input->base)
10999 *lastgt = tmp;
11000 else
11001 *lastgt = NULL;
11002 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011003 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011004 } else {
11005 *lastlt = NULL;
11006 *lastgt = NULL;
11007 }
11008}
11009/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011010 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011011 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011012 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011013 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011014 *
11015 * Check that the block of characters is okay as SCdata content [20]
11016 *
11017 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011018 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011019 */
11020static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011021xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011022 int ix;
11023 unsigned char c;
11024 int codepoint;
11025
11026 if ((utf == NULL) || (len <= 0))
11027 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011028
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011029 for (ix = 0; ix < len;) { /* string is 0-terminated */
11030 c = utf[ix];
11031 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11032 if (c >= 0x20)
11033 ix++;
11034 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11035 ix++;
11036 else
11037 return(-ix);
11038 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011039 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011040 if ((utf[ix+1] & 0xc0 ) != 0x80)
11041 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011042 codepoint = (utf[ix] & 0x1f) << 6;
11043 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011044 if (!xmlIsCharQ(codepoint))
11045 return(-ix);
11046 ix += 2;
11047 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011048 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011049 if (((utf[ix+1] & 0xc0) != 0x80) ||
11050 ((utf[ix+2] & 0xc0) != 0x80))
11051 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011052 codepoint = (utf[ix] & 0xf) << 12;
11053 codepoint |= (utf[ix+1] & 0x3f) << 6;
11054 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011055 if (!xmlIsCharQ(codepoint))
11056 return(-ix);
11057 ix += 3;
11058 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011059 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011060 if (((utf[ix+1] & 0xc0) != 0x80) ||
11061 ((utf[ix+2] & 0xc0) != 0x80) ||
11062 ((utf[ix+3] & 0xc0) != 0x80))
11063 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011064 codepoint = (utf[ix] & 0x7) << 18;
11065 codepoint |= (utf[ix+1] & 0x3f) << 12;
11066 codepoint |= (utf[ix+2] & 0x3f) << 6;
11067 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011068 if (!xmlIsCharQ(codepoint))
11069 return(-ix);
11070 ix += 4;
11071 } else /* unknown encoding */
11072 return(-ix);
11073 }
11074 return(ix);
11075}
11076
11077/**
Owen Taylor3473f882001-02-23 17:55:21 +000011078 * xmlParseTryOrFinish:
11079 * @ctxt: an XML parser context
11080 * @terminate: last chunk indicator
11081 *
11082 * Try to progress on parsing
11083 *
11084 * Returns zero if no parsing was possible
11085 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011086static int
Owen Taylor3473f882001-02-23 17:55:21 +000011087xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11088 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011089 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011090 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011091 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011092
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011093 if (ctxt->input == NULL)
11094 return(0);
11095
Owen Taylor3473f882001-02-23 17:55:21 +000011096#ifdef DEBUG_PUSH
11097 switch (ctxt->instate) {
11098 case XML_PARSER_EOF:
11099 xmlGenericError(xmlGenericErrorContext,
11100 "PP: try EOF\n"); break;
11101 case XML_PARSER_START:
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: try START\n"); break;
11104 case XML_PARSER_MISC:
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: try MISC\n");break;
11107 case XML_PARSER_COMMENT:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: try COMMENT\n");break;
11110 case XML_PARSER_PROLOG:
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: try PROLOG\n");break;
11113 case XML_PARSER_START_TAG:
11114 xmlGenericError(xmlGenericErrorContext,
11115 "PP: try START_TAG\n");break;
11116 case XML_PARSER_CONTENT:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: try CONTENT\n");break;
11119 case XML_PARSER_CDATA_SECTION:
11120 xmlGenericError(xmlGenericErrorContext,
11121 "PP: try CDATA_SECTION\n");break;
11122 case XML_PARSER_END_TAG:
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: try END_TAG\n");break;
11125 case XML_PARSER_ENTITY_DECL:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: try ENTITY_DECL\n");break;
11128 case XML_PARSER_ENTITY_VALUE:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try ENTITY_VALUE\n");break;
11131 case XML_PARSER_ATTRIBUTE_VALUE:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try ATTRIBUTE_VALUE\n");break;
11134 case XML_PARSER_DTD:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try DTD\n");break;
11137 case XML_PARSER_EPILOG:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try EPILOG\n");break;
11140 case XML_PARSER_PI:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try PI\n");break;
11143 case XML_PARSER_IGNORE:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try IGNORE\n");break;
11146 }
11147#endif
11148
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011149 if ((ctxt->input != NULL) &&
11150 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011151 xmlSHRINK(ctxt);
11152 ctxt->checkIndex = 0;
11153 }
11154 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011155
Daniel Veillarde50ba812013-04-11 15:54:51 +080011156 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011157 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011158 return(0);
11159
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011160 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011161 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011162 avail = ctxt->input->length -
11163 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011164 else {
11165 /*
11166 * If we are operating on converted input, try to flush
11167 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011168 * buffer. But do not do this in document start where
11169 * encoding="..." may not have been read and we work on a
11170 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011171 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011172 if ((ctxt->instate != XML_PARSER_START) &&
11173 (ctxt->input->buf->raw != NULL) &&
11174 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011175 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11176 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011177 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011178
11179 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011180 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11181 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011182 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011183 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011184 (ctxt->input->cur - ctxt->input->base);
11185 }
Owen Taylor3473f882001-02-23 17:55:21 +000011186 if (avail < 1)
11187 goto done;
11188 switch (ctxt->instate) {
11189 case XML_PARSER_EOF:
11190 /*
11191 * Document parsing is done !
11192 */
11193 goto done;
11194 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011195 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11196 xmlChar start[4];
11197 xmlCharEncoding enc;
11198
11199 /*
11200 * Very first chars read from the document flow.
11201 */
11202 if (avail < 4)
11203 goto done;
11204
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011205 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011206 * Get the 4 first bytes and decode the charset
11207 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011208 * plug some encoding conversion routines,
11209 * else xmlSwitchEncoding will set to (default)
11210 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011211 */
11212 start[0] = RAW;
11213 start[1] = NXT(1);
11214 start[2] = NXT(2);
11215 start[3] = NXT(3);
11216 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011217 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011218 break;
11219 }
Owen Taylor3473f882001-02-23 17:55:21 +000011220
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011221 if (avail < 2)
11222 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011223 cur = ctxt->input->cur[0];
11224 next = ctxt->input->cur[1];
11225 if (cur == 0) {
11226 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227 ctxt->sax->setDocumentLocator(ctxt->userData,
11228 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011229 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011230 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011231#ifdef DEBUG_PUSH
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: entering EOF\n");
11234#endif
11235 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236 ctxt->sax->endDocument(ctxt->userData);
11237 goto done;
11238 }
11239 if ((cur == '<') && (next == '?')) {
11240 /* PI or XML decl */
11241 if (avail < 5) return(ret);
11242 if ((!terminate) &&
11243 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11244 return(ret);
11245 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246 ctxt->sax->setDocumentLocator(ctxt->userData,
11247 &xmlDefaultSAXLocator);
11248 if ((ctxt->input->cur[2] == 'x') &&
11249 (ctxt->input->cur[3] == 'm') &&
11250 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011251 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011252 ret += 5;
11253#ifdef DEBUG_PUSH
11254 xmlGenericError(xmlGenericErrorContext,
11255 "PP: Parsing XML Decl\n");
11256#endif
11257 xmlParseXMLDecl(ctxt);
11258 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259 /*
11260 * The XML REC instructs us to stop parsing right
11261 * here
11262 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011263 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011264 return(0);
11265 }
11266 ctxt->standalone = ctxt->input->standalone;
11267 if ((ctxt->encoding == NULL) &&
11268 (ctxt->input->encoding != NULL))
11269 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271 (!ctxt->disableSAX))
11272 ctxt->sax->startDocument(ctxt->userData);
11273 ctxt->instate = XML_PARSER_MISC;
11274#ifdef DEBUG_PUSH
11275 xmlGenericError(xmlGenericErrorContext,
11276 "PP: entering MISC\n");
11277#endif
11278 } else {
11279 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281 (!ctxt->disableSAX))
11282 ctxt->sax->startDocument(ctxt->userData);
11283 ctxt->instate = XML_PARSER_MISC;
11284#ifdef DEBUG_PUSH
11285 xmlGenericError(xmlGenericErrorContext,
11286 "PP: entering MISC\n");
11287#endif
11288 }
11289 } else {
11290 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291 ctxt->sax->setDocumentLocator(ctxt->userData,
11292 &xmlDefaultSAXLocator);
11293 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011294 if (ctxt->version == NULL) {
11295 xmlErrMemory(ctxt, NULL);
11296 break;
11297 }
Owen Taylor3473f882001-02-23 17:55:21 +000011298 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299 (!ctxt->disableSAX))
11300 ctxt->sax->startDocument(ctxt->userData);
11301 ctxt->instate = XML_PARSER_MISC;
11302#ifdef DEBUG_PUSH
11303 xmlGenericError(xmlGenericErrorContext,
11304 "PP: entering MISC\n");
11305#endif
11306 }
11307 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011308 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011309 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011310 const xmlChar *prefix = NULL;
11311 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011312 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011313
11314 if ((avail < 2) && (ctxt->inputNr == 1))
11315 goto done;
11316 cur = ctxt->input->cur[0];
11317 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011318 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011319 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011320 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11321 ctxt->sax->endDocument(ctxt->userData);
11322 goto done;
11323 }
11324 if (!terminate) {
11325 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011326 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011327 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011328 goto done;
11329 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11330 goto done;
11331 }
11332 }
11333 if (ctxt->spaceNr == 0)
11334 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011335 else if (*ctxt->space == -2)
11336 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011337 else
11338 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011339#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011340 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011341#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011342 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011343#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011344 else
11345 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011346#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011347 if (ctxt->instate == XML_PARSER_EOF)
11348 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011349 if (name == NULL) {
11350 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011351 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011352 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11353 ctxt->sax->endDocument(ctxt->userData);
11354 goto done;
11355 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011356#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011357 /*
11358 * [ VC: Root Element Type ]
11359 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011360 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011361 */
11362 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11363 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11364 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011365#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011366
11367 /*
11368 * Check for an Empty Element.
11369 */
11370 if ((RAW == '/') && (NXT(1) == '>')) {
11371 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011372
11373 if (ctxt->sax2) {
11374 if ((ctxt->sax != NULL) &&
11375 (ctxt->sax->endElementNs != NULL) &&
11376 (!ctxt->disableSAX))
11377 ctxt->sax->endElementNs(ctxt->userData, name,
11378 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011379 if (ctxt->nsNr - nsNr > 0)
11380 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011381#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011382 } else {
11383 if ((ctxt->sax != NULL) &&
11384 (ctxt->sax->endElement != NULL) &&
11385 (!ctxt->disableSAX))
11386 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011387#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011388 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011389 if (ctxt->instate == XML_PARSER_EOF)
11390 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011391 spacePop(ctxt);
11392 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011393 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011394 } else {
11395 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011396 }
Daniel Veillard65686452012-07-19 18:25:01 +080011397 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011398 break;
11399 }
11400 if (RAW == '>') {
11401 NEXT;
11402 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011403 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011404 "Couldn't find end of Start Tag %s\n",
11405 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011406 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011407 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011408 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011409 if (ctxt->sax2)
11410 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011411#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011412 else
11413 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011414#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011415
Daniel Veillarda880b122003-04-21 21:36:41 +000011416 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011417 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011418 break;
11419 }
11420 case XML_PARSER_CONTENT: {
11421 const xmlChar *test;
11422 unsigned int cons;
11423 if ((avail < 2) && (ctxt->inputNr == 1))
11424 goto done;
11425 cur = ctxt->input->cur[0];
11426 next = ctxt->input->cur[1];
11427
11428 test = CUR_PTR;
11429 cons = ctxt->input->consumed;
11430 if ((cur == '<') && (next == '/')) {
11431 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011432 break;
11433 } else if ((cur == '<') && (next == '?')) {
11434 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011435 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11436 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011437 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011438 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011439 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011440 ctxt->instate = XML_PARSER_CONTENT;
11441 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011442 } else if ((cur == '<') && (next != '!')) {
11443 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011444 break;
11445 } else if ((cur == '<') && (next == '!') &&
11446 (ctxt->input->cur[2] == '-') &&
11447 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011448 int term;
11449
11450 if (avail < 4)
11451 goto done;
11452 ctxt->input->cur += 4;
11453 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11454 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011455 if ((!terminate) && (term < 0)) {
11456 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011457 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011458 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011459 xmlParseComment(ctxt);
11460 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011461 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011462 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11463 (ctxt->input->cur[2] == '[') &&
11464 (ctxt->input->cur[3] == 'C') &&
11465 (ctxt->input->cur[4] == 'D') &&
11466 (ctxt->input->cur[5] == 'A') &&
11467 (ctxt->input->cur[6] == 'T') &&
11468 (ctxt->input->cur[7] == 'A') &&
11469 (ctxt->input->cur[8] == '[')) {
11470 SKIP(9);
11471 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011472 break;
11473 } else if ((cur == '<') && (next == '!') &&
11474 (avail < 9)) {
11475 goto done;
11476 } else if (cur == '&') {
11477 if ((!terminate) &&
11478 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11479 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011480 xmlParseReference(ctxt);
11481 } else {
11482 /* TODO Avoid the extra copy, handle directly !!! */
11483 /*
11484 * Goal of the following test is:
11485 * - minimize calls to the SAX 'character' callback
11486 * when they are mergeable
11487 * - handle an problem for isBlank when we only parse
11488 * a sequence of blank chars and the next one is
11489 * not available to check against '<' presence.
11490 * - tries to homogenize the differences in SAX
11491 * callbacks between the push and pull versions
11492 * of the parser.
11493 */
11494 if ((ctxt->inputNr == 1) &&
11495 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11496 if (!terminate) {
11497 if (ctxt->progressive) {
11498 if ((lastlt == NULL) ||
11499 (ctxt->input->cur > lastlt))
11500 goto done;
11501 } else if (xmlParseLookupSequence(ctxt,
11502 '<', 0, 0) < 0) {
11503 goto done;
11504 }
11505 }
11506 }
11507 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011508 xmlParseCharData(ctxt, 0);
11509 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011510 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011511 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11512 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011513 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011514 break;
11515 }
11516 break;
11517 }
11518 case XML_PARSER_END_TAG:
11519 if (avail < 2)
11520 goto done;
11521 if (!terminate) {
11522 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011523 /* > can be found unescaped in attribute values */
11524 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011525 goto done;
11526 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11527 goto done;
11528 }
11529 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011530 if (ctxt->sax2) {
11531 xmlParseEndTag2(ctxt,
Nick Wellnhoferd422b952017-10-09 13:37:42 +020011532 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11533 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
11534 (int) (ptrdiff_t)
11535 ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011536 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011537 }
11538#ifdef LIBXML_SAX1_ENABLED
11539 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011540 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011541#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011542 if (ctxt->instate == XML_PARSER_EOF) {
11543 /* Nothing */
11544 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011545 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011546 } else {
11547 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 }
11549 break;
11550 case XML_PARSER_CDATA_SECTION: {
11551 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011552 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011553 * cdataBlock merge back contiguous callbacks.
11554 */
11555 int base;
11556
11557 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11558 if (base < 0) {
11559 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011560 int tmp;
11561
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011562 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011563 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011564 if (tmp < 0) {
11565 tmp = -tmp;
11566 ctxt->input->cur += tmp;
11567 goto encoding_error;
11568 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11570 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011571 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011572 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011573 else if (ctxt->sax->characters != NULL)
11574 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011575 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011576 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011577 if (ctxt->instate == XML_PARSER_EOF)
11578 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011579 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011580 ctxt->checkIndex = 0;
11581 }
11582 goto done;
11583 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011584 int tmp;
11585
David Kilzer4f8606c2016-01-05 13:38:09 -080011586 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011587 if ((tmp < 0) || (tmp != base)) {
11588 tmp = -tmp;
11589 ctxt->input->cur += tmp;
11590 goto encoding_error;
11591 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011592 if ((ctxt->sax != NULL) && (base == 0) &&
11593 (ctxt->sax->cdataBlock != NULL) &&
11594 (!ctxt->disableSAX)) {
11595 /*
11596 * Special case to provide identical behaviour
11597 * between pull and push parsers on enpty CDATA
11598 * sections
11599 */
11600 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11601 (!strncmp((const char *)&ctxt->input->cur[-9],
11602 "<![CDATA[", 9)))
11603 ctxt->sax->cdataBlock(ctxt->userData,
11604 BAD_CAST "", 0);
11605 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011606 (!ctxt->disableSAX)) {
11607 if (ctxt->sax->cdataBlock != NULL)
11608 ctxt->sax->cdataBlock(ctxt->userData,
11609 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011610 else if (ctxt->sax->characters != NULL)
11611 ctxt->sax->characters(ctxt->userData,
11612 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011613 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011614 if (ctxt->instate == XML_PARSER_EOF)
11615 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011616 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011617 ctxt->checkIndex = 0;
11618 ctxt->instate = XML_PARSER_CONTENT;
11619#ifdef DEBUG_PUSH
11620 xmlGenericError(xmlGenericErrorContext,
11621 "PP: entering CONTENT\n");
11622#endif
11623 }
11624 break;
11625 }
Owen Taylor3473f882001-02-23 17:55:21 +000011626 case XML_PARSER_MISC:
11627 SKIP_BLANKS;
11628 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011629 avail = ctxt->input->length -
11630 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011631 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011632 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011633 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011634 if (avail < 2)
11635 goto done;
11636 cur = ctxt->input->cur[0];
11637 next = ctxt->input->cur[1];
11638 if ((cur == '<') && (next == '?')) {
11639 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011640 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11641 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011642 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011643 }
Owen Taylor3473f882001-02-23 17:55:21 +000011644#ifdef DEBUG_PUSH
11645 xmlGenericError(xmlGenericErrorContext,
11646 "PP: Parsing PI\n");
11647#endif
11648 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011649 if (ctxt->instate == XML_PARSER_EOF)
11650 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011651 ctxt->instate = XML_PARSER_MISC;
11652 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011653 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011654 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011655 (ctxt->input->cur[2] == '-') &&
11656 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011657 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011658 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11659 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011660 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011661 }
Owen Taylor3473f882001-02-23 17:55:21 +000011662#ifdef DEBUG_PUSH
11663 xmlGenericError(xmlGenericErrorContext,
11664 "PP: Parsing Comment\n");
11665#endif
11666 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011667 if (ctxt->instate == XML_PARSER_EOF)
11668 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011669 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011670 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011671 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011672 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011673 (ctxt->input->cur[2] == 'D') &&
11674 (ctxt->input->cur[3] == 'O') &&
11675 (ctxt->input->cur[4] == 'C') &&
11676 (ctxt->input->cur[5] == 'T') &&
11677 (ctxt->input->cur[6] == 'Y') &&
11678 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011679 (ctxt->input->cur[8] == 'E')) {
11680 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011681 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11682 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011683 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011684 }
Owen Taylor3473f882001-02-23 17:55:21 +000011685#ifdef DEBUG_PUSH
11686 xmlGenericError(xmlGenericErrorContext,
11687 "PP: Parsing internal subset\n");
11688#endif
11689 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011690 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011691 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011692 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011693 if (ctxt->instate == XML_PARSER_EOF)
11694 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011695 if (RAW == '[') {
11696 ctxt->instate = XML_PARSER_DTD;
11697#ifdef DEBUG_PUSH
11698 xmlGenericError(xmlGenericErrorContext,
11699 "PP: entering DTD\n");
11700#endif
11701 } else {
11702 /*
11703 * Create and update the external subset.
11704 */
11705 ctxt->inSubset = 2;
11706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11707 (ctxt->sax->externalSubset != NULL))
11708 ctxt->sax->externalSubset(ctxt->userData,
11709 ctxt->intSubName, ctxt->extSubSystem,
11710 ctxt->extSubURI);
11711 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011712 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011713 ctxt->instate = XML_PARSER_PROLOG;
11714#ifdef DEBUG_PUSH
11715 xmlGenericError(xmlGenericErrorContext,
11716 "PP: entering PROLOG\n");
11717#endif
11718 }
11719 } else if ((cur == '<') && (next == '!') &&
11720 (avail < 9)) {
11721 goto done;
11722 } else {
11723 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011724 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011725 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011726#ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: entering START_TAG\n");
11729#endif
11730 }
11731 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011732 case XML_PARSER_PROLOG:
11733 SKIP_BLANKS;
11734 if (ctxt->input->buf == NULL)
11735 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11736 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011737 avail = xmlBufUse(ctxt->input->buf->buffer) -
11738 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011739 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011740 goto done;
11741 cur = ctxt->input->cur[0];
11742 next = ctxt->input->cur[1];
11743 if ((cur == '<') && (next == '?')) {
11744 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011745 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11746 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011747 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011748 }
Owen Taylor3473f882001-02-23 17:55:21 +000011749#ifdef DEBUG_PUSH
11750 xmlGenericError(xmlGenericErrorContext,
11751 "PP: Parsing PI\n");
11752#endif
11753 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011754 if (ctxt->instate == XML_PARSER_EOF)
11755 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011756 ctxt->instate = XML_PARSER_PROLOG;
11757 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011758 } else if ((cur == '<') && (next == '!') &&
11759 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11760 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011761 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11762 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011763 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011764 }
Owen Taylor3473f882001-02-23 17:55:21 +000011765#ifdef DEBUG_PUSH
11766 xmlGenericError(xmlGenericErrorContext,
11767 "PP: Parsing Comment\n");
11768#endif
11769 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011770 if (ctxt->instate == XML_PARSER_EOF)
11771 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011772 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011773 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011774 } else if ((cur == '<') && (next == '!') &&
11775 (avail < 4)) {
11776 goto done;
11777 } else {
11778 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011779 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011780 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011781 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011782#ifdef DEBUG_PUSH
11783 xmlGenericError(xmlGenericErrorContext,
11784 "PP: entering START_TAG\n");
11785#endif
11786 }
11787 break;
11788 case XML_PARSER_EPILOG:
11789 SKIP_BLANKS;
11790 if (ctxt->input->buf == NULL)
11791 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11792 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011793 avail = xmlBufUse(ctxt->input->buf->buffer) -
11794 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011795 if (avail < 2)
11796 goto done;
11797 cur = ctxt->input->cur[0];
11798 next = ctxt->input->cur[1];
11799 if ((cur == '<') && (next == '?')) {
11800 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011801 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11802 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011803 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011804 }
Owen Taylor3473f882001-02-23 17:55:21 +000011805#ifdef DEBUG_PUSH
11806 xmlGenericError(xmlGenericErrorContext,
11807 "PP: Parsing PI\n");
11808#endif
11809 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011810 if (ctxt->instate == XML_PARSER_EOF)
11811 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011812 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011813 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011814 } else if ((cur == '<') && (next == '!') &&
11815 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11816 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011817 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11818 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011819 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011820 }
Owen Taylor3473f882001-02-23 17:55:21 +000011821#ifdef DEBUG_PUSH
11822 xmlGenericError(xmlGenericErrorContext,
11823 "PP: Parsing Comment\n");
11824#endif
11825 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011826 if (ctxt->instate == XML_PARSER_EOF)
11827 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011828 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011829 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011830 } else if ((cur == '<') && (next == '!') &&
11831 (avail < 4)) {
11832 goto done;
11833 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011834 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011835 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011836#ifdef DEBUG_PUSH
11837 xmlGenericError(xmlGenericErrorContext,
11838 "PP: entering EOF\n");
11839#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011840 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011841 ctxt->sax->endDocument(ctxt->userData);
11842 goto done;
11843 }
11844 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011845 case XML_PARSER_DTD: {
11846 /*
11847 * Sorry but progressive parsing of the internal subset
11848 * is not expected to be supported. We first check that
11849 * the full content of the internal subset is available and
11850 * the parsing is launched only at that point.
11851 * Internal subset ends up with "']' S? '>'" in an unescaped
11852 * section and not in a ']]>' sequence which are conditional
11853 * sections (whoever argued to keep that crap in XML deserve
11854 * a place in hell !).
11855 */
11856 int base, i;
11857 xmlChar *buf;
11858 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011859 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011860
11861 base = ctxt->input->cur - ctxt->input->base;
11862 if (base < 0) return(0);
11863 if (ctxt->checkIndex > base)
11864 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011865 buf = xmlBufContent(ctxt->input->buf->buffer);
11866 use = xmlBufUse(ctxt->input->buf->buffer);
11867 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011868 if (quote != 0) {
11869 if (buf[base] == quote)
11870 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011871 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011872 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011873 if ((quote == 0) && (buf[base] == '<')) {
11874 int found = 0;
11875 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011876 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011877 (buf[base + 1] == '!') &&
11878 (buf[base + 2] == '-') &&
11879 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011880 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011881 if ((buf[base] == '-') &&
11882 (buf[base + 1] == '-') &&
11883 (buf[base + 2] == '>')) {
11884 found = 1;
11885 base += 2;
11886 break;
11887 }
11888 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011889 if (!found) {
11890#if 0
11891 fprintf(stderr, "unfinished comment\n");
11892#endif
11893 break; /* for */
11894 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011895 continue;
11896 }
11897 }
Owen Taylor3473f882001-02-23 17:55:21 +000011898 if (buf[base] == '"') {
11899 quote = '"';
11900 continue;
11901 }
11902 if (buf[base] == '\'') {
11903 quote = '\'';
11904 continue;
11905 }
11906 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011907#if 0
11908 fprintf(stderr, "%c%c%c%c: ", buf[base],
11909 buf[base + 1], buf[base + 2], buf[base + 3]);
11910#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011911 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011912 break;
11913 if (buf[base + 1] == ']') {
11914 /* conditional crap, skip both ']' ! */
11915 base++;
11916 continue;
11917 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011918 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011919 if (buf[base + i] == '>') {
11920#if 0
11921 fprintf(stderr, "found\n");
11922#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011923 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011924 }
11925 if (!IS_BLANK_CH(buf[base + i])) {
11926#if 0
11927 fprintf(stderr, "not found\n");
11928#endif
11929 goto not_end_of_int_subset;
11930 }
Owen Taylor3473f882001-02-23 17:55:21 +000011931 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011932#if 0
11933 fprintf(stderr, "end of stream\n");
11934#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011935 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011936
Owen Taylor3473f882001-02-23 17:55:21 +000011937 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011938not_end_of_int_subset:
11939 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011940 }
11941 /*
11942 * We didn't found the end of the Internal subset
11943 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011944 if (quote == 0)
11945 ctxt->checkIndex = base;
11946 else
11947 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011948#ifdef DEBUG_PUSH
11949 if (next == 0)
11950 xmlGenericError(xmlGenericErrorContext,
11951 "PP: lookup of int subset end filed\n");
11952#endif
11953 goto done;
11954
11955found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011956 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011957 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011958 if (ctxt->instate == XML_PARSER_EOF)
11959 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011960 ctxt->inSubset = 2;
11961 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11962 (ctxt->sax->externalSubset != NULL))
11963 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11964 ctxt->extSubSystem, ctxt->extSubURI);
11965 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011966 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011967 if (ctxt->instate == XML_PARSER_EOF)
11968 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011969 ctxt->instate = XML_PARSER_PROLOG;
11970 ctxt->checkIndex = 0;
11971#ifdef DEBUG_PUSH
11972 xmlGenericError(xmlGenericErrorContext,
11973 "PP: entering PROLOG\n");
11974#endif
11975 break;
11976 }
11977 case XML_PARSER_COMMENT:
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: internal error, state == COMMENT\n");
11980 ctxt->instate = XML_PARSER_CONTENT;
11981#ifdef DEBUG_PUSH
11982 xmlGenericError(xmlGenericErrorContext,
11983 "PP: entering CONTENT\n");
11984#endif
11985 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011986 case XML_PARSER_IGNORE:
11987 xmlGenericError(xmlGenericErrorContext,
11988 "PP: internal error, state == IGNORE");
11989 ctxt->instate = XML_PARSER_DTD;
11990#ifdef DEBUG_PUSH
11991 xmlGenericError(xmlGenericErrorContext,
11992 "PP: entering DTD\n");
11993#endif
11994 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011995 case XML_PARSER_PI:
11996 xmlGenericError(xmlGenericErrorContext,
11997 "PP: internal error, state == PI\n");
11998 ctxt->instate = XML_PARSER_CONTENT;
11999#ifdef DEBUG_PUSH
12000 xmlGenericError(xmlGenericErrorContext,
12001 "PP: entering CONTENT\n");
12002#endif
12003 break;
12004 case XML_PARSER_ENTITY_DECL:
12005 xmlGenericError(xmlGenericErrorContext,
12006 "PP: internal error, state == ENTITY_DECL\n");
12007 ctxt->instate = XML_PARSER_DTD;
12008#ifdef DEBUG_PUSH
12009 xmlGenericError(xmlGenericErrorContext,
12010 "PP: entering DTD\n");
12011#endif
12012 break;
12013 case XML_PARSER_ENTITY_VALUE:
12014 xmlGenericError(xmlGenericErrorContext,
12015 "PP: internal error, state == ENTITY_VALUE\n");
12016 ctxt->instate = XML_PARSER_CONTENT;
12017#ifdef DEBUG_PUSH
12018 xmlGenericError(xmlGenericErrorContext,
12019 "PP: entering DTD\n");
12020#endif
12021 break;
12022 case XML_PARSER_ATTRIBUTE_VALUE:
12023 xmlGenericError(xmlGenericErrorContext,
12024 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12025 ctxt->instate = XML_PARSER_START_TAG;
12026#ifdef DEBUG_PUSH
12027 xmlGenericError(xmlGenericErrorContext,
12028 "PP: entering START_TAG\n");
12029#endif
12030 break;
12031 case XML_PARSER_SYSTEM_LITERAL:
12032 xmlGenericError(xmlGenericErrorContext,
12033 "PP: internal error, state == SYSTEM_LITERAL\n");
12034 ctxt->instate = XML_PARSER_START_TAG;
12035#ifdef DEBUG_PUSH
12036 xmlGenericError(xmlGenericErrorContext,
12037 "PP: entering START_TAG\n");
12038#endif
12039 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012040 case XML_PARSER_PUBLIC_LITERAL:
12041 xmlGenericError(xmlGenericErrorContext,
12042 "PP: internal error, state == PUBLIC_LITERAL\n");
12043 ctxt->instate = XML_PARSER_START_TAG;
12044#ifdef DEBUG_PUSH
12045 xmlGenericError(xmlGenericErrorContext,
12046 "PP: entering START_TAG\n");
12047#endif
12048 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012049 }
12050 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012051done:
Owen Taylor3473f882001-02-23 17:55:21 +000012052#ifdef DEBUG_PUSH
12053 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12054#endif
12055 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012056encoding_error:
12057 {
12058 char buffer[150];
12059
12060 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12061 ctxt->input->cur[0], ctxt->input->cur[1],
12062 ctxt->input->cur[2], ctxt->input->cur[3]);
12063 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12064 "Input is not proper UTF-8, indicate encoding !\n%s",
12065 BAD_CAST buffer, NULL);
12066 }
12067 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012068}
12069
12070/**
Daniel Veillard65686452012-07-19 18:25:01 +080012071 * xmlParseCheckTransition:
12072 * @ctxt: an XML parser context
12073 * @chunk: a char array
12074 * @size: the size in byte of the chunk
12075 *
12076 * Check depending on the current parser state if the chunk given must be
12077 * processed immediately or one need more data to advance on parsing.
12078 *
12079 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12080 */
12081static int
12082xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12083 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12084 return(-1);
12085 if (ctxt->instate == XML_PARSER_START_TAG) {
12086 if (memchr(chunk, '>', size) != NULL)
12087 return(1);
12088 return(0);
12089 }
12090 if (ctxt->progressive == XML_PARSER_COMMENT) {
12091 if (memchr(chunk, '>', size) != NULL)
12092 return(1);
12093 return(0);
12094 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012095 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12096 if (memchr(chunk, '>', size) != NULL)
12097 return(1);
12098 return(0);
12099 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012100 if (ctxt->progressive == XML_PARSER_PI) {
12101 if (memchr(chunk, '>', size) != NULL)
12102 return(1);
12103 return(0);
12104 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012105 if (ctxt->instate == XML_PARSER_END_TAG) {
12106 if (memchr(chunk, '>', size) != NULL)
12107 return(1);
12108 return(0);
12109 }
12110 if ((ctxt->progressive == XML_PARSER_DTD) ||
12111 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012112 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012113 return(1);
12114 return(0);
12115 }
Daniel Veillard65686452012-07-19 18:25:01 +080012116 return(1);
12117}
12118
12119/**
Owen Taylor3473f882001-02-23 17:55:21 +000012120 * xmlParseChunk:
12121 * @ctxt: an XML parser context
12122 * @chunk: an char array
12123 * @size: the size in byte of the chunk
12124 * @terminate: last chunk indicator
12125 *
12126 * Parse a Chunk of memory
12127 *
12128 * Returns zero if no error, the xmlParserErrors otherwise.
12129 */
12130int
12131xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12132 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012133 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012134 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012135 size_t old_avail = 0;
12136 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012137
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012138 if (ctxt == NULL)
12139 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012140 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012141 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012142 if (ctxt->instate == XML_PARSER_EOF)
12143 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012144 if (ctxt->instate == XML_PARSER_START)
12145 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012146 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12147 (chunk[size - 1] == '\r')) {
12148 end_in_lf = 1;
12149 size--;
12150 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012151
12152xmldecl_done:
12153
Owen Taylor3473f882001-02-23 17:55:21 +000012154 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12155 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012156 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12157 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012158 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012159
Daniel Veillard65686452012-07-19 18:25:01 +080012160 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012161 /*
12162 * Specific handling if we autodetected an encoding, we should not
12163 * push more than the first line ... which depend on the encoding
12164 * And only push the rest once the final encoding was detected
12165 */
12166 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12167 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012168 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012169
12170 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12171 BAD_CAST "UTF-16")) ||
12172 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12173 BAD_CAST "UTF16")))
12174 len = 90;
12175 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12176 BAD_CAST "UCS-4")) ||
12177 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12178 BAD_CAST "UCS4")))
12179 len = 180;
12180
12181 if (ctxt->input->buf->rawconsumed < len)
12182 len -= ctxt->input->buf->rawconsumed;
12183
Raul Hudeaba9716a2010-03-15 10:13:29 +010012184 /*
12185 * Change size for reading the initial declaration only
12186 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12187 * will blindly copy extra bytes from memory.
12188 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012189 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012190 remain = size - len;
12191 size = len;
12192 } else {
12193 remain = 0;
12194 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012195 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012196 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012197 if (res < 0) {
12198 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012199 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012200 return (XML_PARSER_EOF);
12201 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012202 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012203#ifdef DEBUG_PUSH
12204 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12205#endif
12206
Owen Taylor3473f882001-02-23 17:55:21 +000012207 } else if (ctxt->instate != XML_PARSER_EOF) {
12208 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12209 xmlParserInputBufferPtr in = ctxt->input->buf;
12210 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12211 (in->raw != NULL)) {
12212 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012213 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12214 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012215
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012216 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012217 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012218 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012219 xmlGenericError(xmlGenericErrorContext,
12220 "xmlParseChunk: encoder error\n");
Nick Wellnhoferab362ab2018-01-22 15:40:05 +010012221 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012222 return(XML_ERR_INVALID_ENCODING);
12223 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012224 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012225 }
12226 }
12227 }
Daniel Veillard65686452012-07-19 18:25:01 +080012228 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012229 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012230 } else {
12231 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12232 avail = xmlBufUse(ctxt->input->buf->buffer);
12233 /*
12234 * Depending on the current state it may not be such
12235 * a good idea to try parsing if there is nothing in the chunk
12236 * which would be worth doing a parser state transition and we
12237 * need to wait for more data
12238 */
12239 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12240 (old_avail == 0) || (avail == 0) ||
12241 (xmlParseCheckTransition(ctxt,
12242 (const char *)&ctxt->input->base[old_avail],
12243 avail - old_avail)))
12244 xmlParseTryOrFinish(ctxt, terminate);
12245 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012246 if (ctxt->instate == XML_PARSER_EOF)
12247 return(ctxt->errNo);
12248
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012249 if ((ctxt->input != NULL) &&
12250 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12251 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12252 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12253 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012254 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012255 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012256 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12257 return(ctxt->errNo);
12258
12259 if (remain != 0) {
12260 chunk += size;
12261 size = remain;
12262 remain = 0;
12263 goto xmldecl_done;
12264 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012265 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12266 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012267 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12268 ctxt->input);
12269 size_t current = ctxt->input->cur - ctxt->input->base;
12270
Daniel Veillarda617e242006-01-09 14:38:44 +000012271 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012272
12273 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12274 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012275 }
Owen Taylor3473f882001-02-23 17:55:21 +000012276 if (terminate) {
12277 /*
12278 * Check for termination
12279 */
Daniel Veillard65686452012-07-19 18:25:01 +080012280 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012281
12282 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012283 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012284 cur_avail = ctxt->input->length -
12285 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012286 else
Daniel Veillard65686452012-07-19 18:25:01 +080012287 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12288 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012289 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012290
Owen Taylor3473f882001-02-23 17:55:21 +000012291 if ((ctxt->instate != XML_PARSER_EOF) &&
12292 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012293 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012294 }
Daniel Veillard65686452012-07-19 18:25:01 +080012295 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012296 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012297 }
Owen Taylor3473f882001-02-23 17:55:21 +000012298 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012299 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012300 ctxt->sax->endDocument(ctxt->userData);
12301 }
12302 ctxt->instate = XML_PARSER_EOF;
12303 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012304 if (ctxt->wellFormed == 0)
12305 return((xmlParserErrors) ctxt->errNo);
12306 else
12307 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012308}
12309
12310/************************************************************************
12311 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012312 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012313 * *
12314 ************************************************************************/
12315
12316/**
Owen Taylor3473f882001-02-23 17:55:21 +000012317 * xmlCreatePushParserCtxt:
12318 * @sax: a SAX handler
12319 * @user_data: The user data returned on SAX callbacks
12320 * @chunk: a pointer to an array of chars
12321 * @size: number of chars in the array
12322 * @filename: an optional file name or URI
12323 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012324 * Create a parser context for using the XML parser in push mode.
12325 * If @buffer and @size are non-NULL, the data is used to detect
12326 * the encoding. The remaining characters will be parsed so they
12327 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012328 * To allow content encoding detection, @size should be >= 4
12329 * The value of @filename is used for fetching external entities
12330 * and error/warning reports.
12331 *
12332 * Returns the new parser context or NULL
12333 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012334
Owen Taylor3473f882001-02-23 17:55:21 +000012335xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012336xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012337 const char *chunk, int size, const char *filename) {
12338 xmlParserCtxtPtr ctxt;
12339 xmlParserInputPtr inputStream;
12340 xmlParserInputBufferPtr buf;
12341 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12342
12343 /*
12344 * plug some encoding conversion routines
12345 */
12346 if ((chunk != NULL) && (size >= 4))
12347 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12348
12349 buf = xmlAllocParserInputBuffer(enc);
12350 if (buf == NULL) return(NULL);
12351
12352 ctxt = xmlNewParserCtxt();
12353 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012354 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012355 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012356 return(NULL);
12357 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012358 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012359 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12360 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012361 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012362 xmlFreeParserInputBuffer(buf);
12363 xmlFreeParserCtxt(ctxt);
12364 return(NULL);
12365 }
Owen Taylor3473f882001-02-23 17:55:21 +000012366 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012367#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012368 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012369#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012370 xmlFree(ctxt->sax);
12371 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12372 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012373 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012374 xmlFreeParserInputBuffer(buf);
12375 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012376 return(NULL);
12377 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012378 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12379 if (sax->initialized == XML_SAX2_MAGIC)
12380 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12381 else
12382 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012383 if (user_data != NULL)
12384 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012385 }
Owen Taylor3473f882001-02-23 17:55:21 +000012386 if (filename == NULL) {
12387 ctxt->directory = NULL;
12388 } else {
12389 ctxt->directory = xmlParserGetDirectory(filename);
12390 }
12391
12392 inputStream = xmlNewInputStream(ctxt);
12393 if (inputStream == NULL) {
12394 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012395 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012396 return(NULL);
12397 }
12398
12399 if (filename == NULL)
12400 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012401 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012402 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012403 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012404 if (inputStream->filename == NULL) {
12405 xmlFreeParserCtxt(ctxt);
12406 xmlFreeParserInputBuffer(buf);
12407 return(NULL);
12408 }
12409 }
Owen Taylor3473f882001-02-23 17:55:21 +000012410 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012411 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012412 inputPush(ctxt, inputStream);
12413
William M. Brack3a1cd212005-02-11 14:35:54 +000012414 /*
12415 * If the caller didn't provide an initial 'chunk' for determining
12416 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12417 * that it can be automatically determined later
12418 */
12419 if ((size == 0) || (chunk == NULL)) {
12420 ctxt->charset = XML_CHAR_ENCODING_NONE;
12421 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012422 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12423 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012424
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012425 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012426
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012427 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012428#ifdef DEBUG_PUSH
12429 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12430#endif
12431 }
12432
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012433 if (enc != XML_CHAR_ENCODING_NONE) {
12434 xmlSwitchEncoding(ctxt, enc);
12435 }
12436
Owen Taylor3473f882001-02-23 17:55:21 +000012437 return(ctxt);
12438}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012439#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012440
12441/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012442 * xmlHaltParser:
12443 * @ctxt: an XML parser context
12444 *
12445 * Blocks further parser processing don't override error
12446 * for internal use
12447 */
12448static void
12449xmlHaltParser(xmlParserCtxtPtr ctxt) {
12450 if (ctxt == NULL)
12451 return;
12452 ctxt->instate = XML_PARSER_EOF;
12453 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012454 while (ctxt->inputNr > 1)
12455 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012456 if (ctxt->input != NULL) {
12457 /*
12458 * in case there was a specific allocation deallocate before
12459 * overriding base
12460 */
12461 if (ctxt->input->free != NULL) {
12462 ctxt->input->free((xmlChar *) ctxt->input->base);
12463 ctxt->input->free = NULL;
12464 }
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012465 if (ctxt->input->buf != NULL) {
12466 xmlFreeParserInputBuffer(ctxt->input->buf);
12467 ctxt->input->buf = NULL;
12468 }
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012469 ctxt->input->cur = BAD_CAST"";
Elliott Hughes7fbecab2019-01-10 16:42:03 -080012470 ctxt->input->length = 0;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012471 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012472 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012473 }
12474}
12475
12476/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012477 * xmlStopParser:
12478 * @ctxt: an XML parser context
12479 *
12480 * Blocks further parser processing
12481 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012482void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012483xmlStopParser(xmlParserCtxtPtr ctxt) {
12484 if (ctxt == NULL)
12485 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012486 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012487 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012488}
12489
12490/**
Owen Taylor3473f882001-02-23 17:55:21 +000012491 * xmlCreateIOParserCtxt:
12492 * @sax: a SAX handler
12493 * @user_data: The user data returned on SAX callbacks
12494 * @ioread: an I/O read function
12495 * @ioclose: an I/O close function
12496 * @ioctx: an I/O handler
12497 * @enc: the charset encoding if known
12498 *
12499 * Create a parser context for using the XML parser with an existing
12500 * I/O stream
12501 *
12502 * Returns the new parser context or NULL
12503 */
12504xmlParserCtxtPtr
12505xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12506 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12507 void *ioctx, xmlCharEncoding enc) {
12508 xmlParserCtxtPtr ctxt;
12509 xmlParserInputPtr inputStream;
12510 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012511
Daniel Veillard42595322004-11-08 10:52:06 +000012512 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012513
12514 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012515 if (buf == NULL) {
12516 if (ioclose != NULL)
12517 ioclose(ioctx);
12518 return (NULL);
12519 }
Owen Taylor3473f882001-02-23 17:55:21 +000012520
12521 ctxt = xmlNewParserCtxt();
12522 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012523 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012524 return(NULL);
12525 }
12526 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012527#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012528 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012529#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012530 xmlFree(ctxt->sax);
12531 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12532 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012533 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012534 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012535 return(NULL);
12536 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012537 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12538 if (sax->initialized == XML_SAX2_MAGIC)
12539 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12540 else
12541 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012542 if (user_data != NULL)
12543 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012544 }
Owen Taylor3473f882001-02-23 17:55:21 +000012545
12546 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12547 if (inputStream == NULL) {
12548 xmlFreeParserCtxt(ctxt);
12549 return(NULL);
12550 }
12551 inputPush(ctxt, inputStream);
12552
12553 return(ctxt);
12554}
12555
Daniel Veillard4432df22003-09-28 18:58:27 +000012556#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012557/************************************************************************
12558 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012559 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012560 * *
12561 ************************************************************************/
12562
12563/**
12564 * xmlIOParseDTD:
12565 * @sax: the SAX handler block or NULL
12566 * @input: an Input Buffer
12567 * @enc: the charset encoding if known
12568 *
12569 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012570 *
Owen Taylor3473f882001-02-23 17:55:21 +000012571 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012572 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012573 */
12574
12575xmlDtdPtr
12576xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12577 xmlCharEncoding enc) {
12578 xmlDtdPtr ret = NULL;
12579 xmlParserCtxtPtr ctxt;
12580 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012581 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012582
12583 if (input == NULL)
12584 return(NULL);
12585
12586 ctxt = xmlNewParserCtxt();
12587 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012588 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012589 return(NULL);
12590 }
12591
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012592 /* We are loading a DTD */
12593 ctxt->options |= XML_PARSE_DTDLOAD;
12594
Owen Taylor3473f882001-02-23 17:55:21 +000012595 /*
12596 * Set-up the SAX context
12597 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012598 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012599 if (ctxt->sax != NULL)
12600 xmlFree(ctxt->sax);
12601 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012602 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012603 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012604 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012605
12606 /*
12607 * generate a parser input from the I/O handler
12608 */
12609
Daniel Veillard43caefb2003-12-07 19:32:22 +000012610 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012611 if (pinput == NULL) {
12612 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012613 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012614 xmlFreeParserCtxt(ctxt);
12615 return(NULL);
12616 }
12617
12618 /*
12619 * plug some encoding conversion routines here.
12620 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012621 if (xmlPushInput(ctxt, pinput) < 0) {
12622 if (sax != NULL) ctxt->sax = NULL;
12623 xmlFreeParserCtxt(ctxt);
12624 return(NULL);
12625 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012626 if (enc != XML_CHAR_ENCODING_NONE) {
12627 xmlSwitchEncoding(ctxt, enc);
12628 }
Owen Taylor3473f882001-02-23 17:55:21 +000012629
12630 pinput->filename = NULL;
12631 pinput->line = 1;
12632 pinput->col = 1;
12633 pinput->base = ctxt->input->cur;
12634 pinput->cur = ctxt->input->cur;
12635 pinput->free = NULL;
12636
12637 /*
12638 * let's parse that entity knowing it's an external subset.
12639 */
12640 ctxt->inSubset = 2;
12641 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012642 if (ctxt->myDoc == NULL) {
12643 xmlErrMemory(ctxt, "New Doc failed");
12644 return(NULL);
12645 }
12646 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012647 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12648 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012649
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012650 if ((enc == XML_CHAR_ENCODING_NONE) &&
12651 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012652 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012653 * Get the 4 first bytes and decode the charset
12654 * if enc != XML_CHAR_ENCODING_NONE
12655 * plug some encoding conversion routines.
12656 */
12657 start[0] = RAW;
12658 start[1] = NXT(1);
12659 start[2] = NXT(2);
12660 start[3] = NXT(3);
12661 enc = xmlDetectCharEncoding(start, 4);
12662 if (enc != XML_CHAR_ENCODING_NONE) {
12663 xmlSwitchEncoding(ctxt, enc);
12664 }
12665 }
12666
Owen Taylor3473f882001-02-23 17:55:21 +000012667 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12668
12669 if (ctxt->myDoc != NULL) {
12670 if (ctxt->wellFormed) {
12671 ret = ctxt->myDoc->extSubset;
12672 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012673 if (ret != NULL) {
12674 xmlNodePtr tmp;
12675
12676 ret->doc = NULL;
12677 tmp = ret->children;
12678 while (tmp != NULL) {
12679 tmp->doc = NULL;
12680 tmp = tmp->next;
12681 }
12682 }
Owen Taylor3473f882001-02-23 17:55:21 +000012683 } else {
12684 ret = NULL;
12685 }
12686 xmlFreeDoc(ctxt->myDoc);
12687 ctxt->myDoc = NULL;
12688 }
12689 if (sax != NULL) ctxt->sax = NULL;
12690 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012691
Owen Taylor3473f882001-02-23 17:55:21 +000012692 return(ret);
12693}
12694
12695/**
12696 * xmlSAXParseDTD:
12697 * @sax: the SAX handler block
12698 * @ExternalID: a NAME* containing the External ID of the DTD
12699 * @SystemID: a NAME* containing the URL to the DTD
12700 *
12701 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012702 *
Owen Taylor3473f882001-02-23 17:55:21 +000012703 * Returns the resulting xmlDtdPtr or NULL in case of error.
12704 */
12705
12706xmlDtdPtr
12707xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12708 const xmlChar *SystemID) {
12709 xmlDtdPtr ret = NULL;
12710 xmlParserCtxtPtr ctxt;
12711 xmlParserInputPtr input = NULL;
12712 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012713 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012714
12715 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12716
12717 ctxt = xmlNewParserCtxt();
12718 if (ctxt == NULL) {
12719 return(NULL);
12720 }
12721
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012722 /* We are loading a DTD */
12723 ctxt->options |= XML_PARSE_DTDLOAD;
12724
Owen Taylor3473f882001-02-23 17:55:21 +000012725 /*
12726 * Set-up the SAX context
12727 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012728 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012729 if (ctxt->sax != NULL)
12730 xmlFree(ctxt->sax);
12731 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012732 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012733 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012734
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012735 /*
12736 * Canonicalise the system ID
12737 */
12738 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012739 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012740 xmlFreeParserCtxt(ctxt);
12741 return(NULL);
12742 }
Owen Taylor3473f882001-02-23 17:55:21 +000012743
12744 /*
12745 * Ask the Entity resolver to load the damn thing
12746 */
12747
12748 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012749 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12750 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012751 if (input == NULL) {
12752 if (sax != NULL) ctxt->sax = NULL;
12753 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012754 if (systemIdCanonic != NULL)
12755 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012756 return(NULL);
12757 }
12758
12759 /*
12760 * plug some encoding conversion routines here.
12761 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012762 if (xmlPushInput(ctxt, input) < 0) {
12763 if (sax != NULL) ctxt->sax = NULL;
12764 xmlFreeParserCtxt(ctxt);
12765 if (systemIdCanonic != NULL)
12766 xmlFree(systemIdCanonic);
12767 return(NULL);
12768 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012769 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12770 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12771 xmlSwitchEncoding(ctxt, enc);
12772 }
Owen Taylor3473f882001-02-23 17:55:21 +000012773
12774 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012775 input->filename = (char *) systemIdCanonic;
12776 else
12777 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012778 input->line = 1;
12779 input->col = 1;
12780 input->base = ctxt->input->cur;
12781 input->cur = ctxt->input->cur;
12782 input->free = NULL;
12783
12784 /*
12785 * let's parse that entity knowing it's an external subset.
12786 */
12787 ctxt->inSubset = 2;
12788 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012789 if (ctxt->myDoc == NULL) {
12790 xmlErrMemory(ctxt, "New Doc failed");
12791 if (sax != NULL) ctxt->sax = NULL;
12792 xmlFreeParserCtxt(ctxt);
12793 return(NULL);
12794 }
12795 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012796 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12797 ExternalID, SystemID);
12798 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12799
12800 if (ctxt->myDoc != NULL) {
12801 if (ctxt->wellFormed) {
12802 ret = ctxt->myDoc->extSubset;
12803 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012804 if (ret != NULL) {
12805 xmlNodePtr tmp;
12806
12807 ret->doc = NULL;
12808 tmp = ret->children;
12809 while (tmp != NULL) {
12810 tmp->doc = NULL;
12811 tmp = tmp->next;
12812 }
12813 }
Owen Taylor3473f882001-02-23 17:55:21 +000012814 } else {
12815 ret = NULL;
12816 }
12817 xmlFreeDoc(ctxt->myDoc);
12818 ctxt->myDoc = NULL;
12819 }
12820 if (sax != NULL) ctxt->sax = NULL;
12821 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012822
Owen Taylor3473f882001-02-23 17:55:21 +000012823 return(ret);
12824}
12825
Daniel Veillard4432df22003-09-28 18:58:27 +000012826
Owen Taylor3473f882001-02-23 17:55:21 +000012827/**
12828 * xmlParseDTD:
12829 * @ExternalID: a NAME* containing the External ID of the DTD
12830 * @SystemID: a NAME* containing the URL to the DTD
12831 *
12832 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012833 *
Owen Taylor3473f882001-02-23 17:55:21 +000012834 * Returns the resulting xmlDtdPtr or NULL in case of error.
12835 */
12836
12837xmlDtdPtr
12838xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12839 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12840}
Daniel Veillard4432df22003-09-28 18:58:27 +000012841#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012842
12843/************************************************************************
12844 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012845 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012846 * *
12847 ************************************************************************/
12848
12849/**
Owen Taylor3473f882001-02-23 17:55:21 +000012850 * xmlParseCtxtExternalEntity:
12851 * @ctx: the existing parsing context
12852 * @URL: the URL for the entity to load
12853 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012854 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012855 *
12856 * Parse an external general entity within an existing parsing context
12857 * An external general parsed entity is well-formed if it matches the
12858 * production labeled extParsedEnt.
12859 *
12860 * [78] extParsedEnt ::= TextDecl? content
12861 *
12862 * Returns 0 if the entity is well formed, -1 in case of args problem and
12863 * the parser error code otherwise
12864 */
12865
12866int
12867xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012868 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012869 xmlParserCtxtPtr ctxt;
12870 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012871 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012872 xmlSAXHandlerPtr oldsax = NULL;
12873 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012874 xmlChar start[4];
12875 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012876
Daniel Veillardce682bc2004-11-05 17:22:25 +000012877 if (ctx == NULL) return(-1);
12878
Daniel Veillard0161e632008-08-28 15:36:32 +000012879 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12880 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012881 return(XML_ERR_ENTITY_LOOP);
12882 }
12883
Daniel Veillardcda96922001-08-21 10:56:31 +000012884 if (lst != NULL)
12885 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012886 if ((URL == NULL) && (ID == NULL))
12887 return(-1);
12888 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12889 return(-1);
12890
Rob Richards798743a2009-06-19 13:54:25 -040012891 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012892 if (ctxt == NULL) {
12893 return(-1);
12894 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012895
Owen Taylor3473f882001-02-23 17:55:21 +000012896 oldsax = ctxt->sax;
12897 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012898 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012899 newDoc = xmlNewDoc(BAD_CAST "1.0");
12900 if (newDoc == NULL) {
12901 xmlFreeParserCtxt(ctxt);
12902 return(-1);
12903 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012904 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012905 if (ctx->myDoc->dict) {
12906 newDoc->dict = ctx->myDoc->dict;
12907 xmlDictReference(newDoc->dict);
12908 }
Owen Taylor3473f882001-02-23 17:55:21 +000012909 if (ctx->myDoc != NULL) {
12910 newDoc->intSubset = ctx->myDoc->intSubset;
12911 newDoc->extSubset = ctx->myDoc->extSubset;
12912 }
12913 if (ctx->myDoc->URL != NULL) {
12914 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12915 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012916 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12917 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012918 ctxt->sax = oldsax;
12919 xmlFreeParserCtxt(ctxt);
12920 newDoc->intSubset = NULL;
12921 newDoc->extSubset = NULL;
12922 xmlFreeDoc(newDoc);
12923 return(-1);
12924 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012925 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012926 nodePush(ctxt, newDoc->children);
12927 if (ctx->myDoc == NULL) {
12928 ctxt->myDoc = newDoc;
12929 } else {
12930 ctxt->myDoc = ctx->myDoc;
12931 newDoc->children->doc = ctx->myDoc;
12932 }
12933
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012934 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012935 * Get the 4 first bytes and decode the charset
12936 * if enc != XML_CHAR_ENCODING_NONE
12937 * plug some encoding conversion routines.
12938 */
12939 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012940 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12941 start[0] = RAW;
12942 start[1] = NXT(1);
12943 start[2] = NXT(2);
12944 start[3] = NXT(3);
12945 enc = xmlDetectCharEncoding(start, 4);
12946 if (enc != XML_CHAR_ENCODING_NONE) {
12947 xmlSwitchEncoding(ctxt, enc);
12948 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012949 }
12950
Owen Taylor3473f882001-02-23 17:55:21 +000012951 /*
12952 * Parse a possible text declaration first
12953 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012954 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012955 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012956 /*
12957 * An XML-1.0 document can't reference an entity not XML-1.0
12958 */
12959 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12960 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012961 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012962 "Version mismatch between document and entity\n");
12963 }
Owen Taylor3473f882001-02-23 17:55:21 +000012964 }
12965
12966 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012967 * If the user provided its own SAX callbacks then reuse the
12968 * useData callback field, otherwise the expected setup in a
12969 * DOM builder is to have userData == ctxt
12970 */
12971 if (ctx->userData == ctx)
12972 ctxt->userData = ctxt;
12973 else
12974 ctxt->userData = ctx->userData;
12975
12976 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012977 * Doing validity checking on chunk doesn't make sense
12978 */
12979 ctxt->instate = XML_PARSER_CONTENT;
12980 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012981 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012982 ctxt->loadsubset = ctx->loadsubset;
12983 ctxt->depth = ctx->depth + 1;
12984 ctxt->replaceEntities = ctx->replaceEntities;
12985 if (ctxt->validate) {
12986 ctxt->vctxt.error = ctx->vctxt.error;
12987 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012988 } else {
12989 ctxt->vctxt.error = NULL;
12990 ctxt->vctxt.warning = NULL;
12991 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012992 ctxt->vctxt.nodeTab = NULL;
12993 ctxt->vctxt.nodeNr = 0;
12994 ctxt->vctxt.nodeMax = 0;
12995 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012996 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12997 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012998 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12999 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13000 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013001 ctxt->dictNames = ctx->dictNames;
13002 ctxt->attsDefault = ctx->attsDefault;
13003 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013004 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013005
13006 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013007
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013008 ctx->validate = ctxt->validate;
13009 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013010 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013012 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013013 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013014 }
13015 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013016 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013017 }
13018
13019 if (!ctxt->wellFormed) {
13020 if (ctxt->errNo == 0)
13021 ret = 1;
13022 else
13023 ret = ctxt->errNo;
13024 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013025 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013026 xmlNodePtr cur;
13027
13028 /*
13029 * Return the newly created nodeset after unlinking it from
13030 * they pseudo parent.
13031 */
13032 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013033 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013034 while (cur != NULL) {
13035 cur->parent = NULL;
13036 cur = cur->next;
13037 }
13038 newDoc->children->children = NULL;
13039 }
13040 ret = 0;
13041 }
13042 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013043 ctxt->dict = NULL;
13044 ctxt->attsDefault = NULL;
13045 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013046 xmlFreeParserCtxt(ctxt);
13047 newDoc->intSubset = NULL;
13048 newDoc->extSubset = NULL;
13049 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013050
Owen Taylor3473f882001-02-23 17:55:21 +000013051 return(ret);
13052}
13053
13054/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013055 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013056 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013057 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013058 * @sax: the SAX handler bloc (possibly NULL)
13059 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13060 * @depth: Used for loop detection, use 0
13061 * @URL: the URL for the entity to load
13062 * @ID: the System ID for the entity to load
13063 * @list: the return value for the set of parsed nodes
13064 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013065 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013066 *
13067 * Returns 0 if the entity is well formed, -1 in case of args problem and
13068 * the parser error code otherwise
13069 */
13070
Daniel Veillard7d515752003-09-26 19:12:37 +000013071static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013072xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13073 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013074 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013075 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013076 xmlParserCtxtPtr ctxt;
13077 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013078 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013079 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013080 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013081 xmlChar start[4];
13082 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013083
Daniel Veillard0161e632008-08-28 15:36:32 +000013084 if (((depth > 40) &&
13085 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13086 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013087 return(XML_ERR_ENTITY_LOOP);
13088 }
13089
Owen Taylor3473f882001-02-23 17:55:21 +000013090 if (list != NULL)
13091 *list = NULL;
13092 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013093 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013094 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013095 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013096
13097
Rob Richards9c0aa472009-03-26 18:10:19 +000013098 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013099 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013100 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013101 if (oldctxt != NULL) {
13102 ctxt->_private = oldctxt->_private;
13103 ctxt->loadsubset = oldctxt->loadsubset;
13104 ctxt->validate = oldctxt->validate;
13105 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013106 ctxt->record_info = oldctxt->record_info;
13107 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13108 ctxt->node_seq.length = oldctxt->node_seq.length;
13109 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013110 } else {
13111 /*
13112 * Doing validity checking on chunk without context
13113 * doesn't make sense
13114 */
13115 ctxt->_private = NULL;
13116 ctxt->validate = 0;
13117 ctxt->external = 2;
13118 ctxt->loadsubset = 0;
13119 }
Owen Taylor3473f882001-02-23 17:55:21 +000013120 if (sax != NULL) {
13121 oldsax = ctxt->sax;
13122 ctxt->sax = sax;
13123 if (user_data != NULL)
13124 ctxt->userData = user_data;
13125 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013126 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013127 newDoc = xmlNewDoc(BAD_CAST "1.0");
13128 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013129 ctxt->node_seq.maximum = 0;
13130 ctxt->node_seq.length = 0;
13131 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013132 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013133 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013134 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013135 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013136 newDoc->intSubset = doc->intSubset;
13137 newDoc->extSubset = doc->extSubset;
13138 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013139 xmlDictReference(newDoc->dict);
13140
Owen Taylor3473f882001-02-23 17:55:21 +000013141 if (doc->URL != NULL) {
13142 newDoc->URL = xmlStrdup(doc->URL);
13143 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013144 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13145 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013146 if (sax != NULL)
13147 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013148 ctxt->node_seq.maximum = 0;
13149 ctxt->node_seq.length = 0;
13150 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013151 xmlFreeParserCtxt(ctxt);
13152 newDoc->intSubset = NULL;
13153 newDoc->extSubset = NULL;
13154 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013155 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013156 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013157 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013158 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013159 ctxt->myDoc = doc;
13160 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013161
Daniel Veillard0161e632008-08-28 15:36:32 +000013162 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013163 * Get the 4 first bytes and decode the charset
13164 * if enc != XML_CHAR_ENCODING_NONE
13165 * plug some encoding conversion routines.
13166 */
13167 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013168 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13169 start[0] = RAW;
13170 start[1] = NXT(1);
13171 start[2] = NXT(2);
13172 start[3] = NXT(3);
13173 enc = xmlDetectCharEncoding(start, 4);
13174 if (enc != XML_CHAR_ENCODING_NONE) {
13175 xmlSwitchEncoding(ctxt, enc);
13176 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013177 }
13178
Owen Taylor3473f882001-02-23 17:55:21 +000013179 /*
13180 * Parse a possible text declaration first
13181 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013182 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013183 xmlParseTextDecl(ctxt);
13184 }
13185
Owen Taylor3473f882001-02-23 17:55:21 +000013186 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013187 ctxt->depth = depth;
13188
13189 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013190
Daniel Veillard561b7f82002-03-20 21:55:57 +000013191 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013193 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013194 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013195 }
13196 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013197 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013198 }
13199
13200 if (!ctxt->wellFormed) {
13201 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013202 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013203 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013204 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013205 } else {
13206 if (list != NULL) {
13207 xmlNodePtr cur;
13208
13209 /*
13210 * Return the newly created nodeset after unlinking it from
13211 * they pseudo parent.
13212 */
13213 cur = newDoc->children->children;
13214 *list = cur;
13215 while (cur != NULL) {
13216 cur->parent = NULL;
13217 cur = cur->next;
13218 }
13219 newDoc->children->children = NULL;
13220 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013221 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013222 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013223
13224 /*
13225 * Record in the parent context the number of entities replacement
13226 * done when parsing that reference.
13227 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013228 if (oldctxt != NULL)
13229 oldctxt->nbentities += ctxt->nbentities;
13230
Daniel Veillard0161e632008-08-28 15:36:32 +000013231 /*
13232 * Also record the size of the entity parsed
13233 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013234 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013235 oldctxt->sizeentities += ctxt->input->consumed;
13236 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13237 }
13238 /*
13239 * And record the last error if any
13240 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013241 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013242 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13243
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013244 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013245 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013246 if (oldctxt != NULL) {
13247 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13248 oldctxt->node_seq.length = ctxt->node_seq.length;
13249 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13250 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013251 ctxt->node_seq.maximum = 0;
13252 ctxt->node_seq.length = 0;
13253 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013254 xmlFreeParserCtxt(ctxt);
13255 newDoc->intSubset = NULL;
13256 newDoc->extSubset = NULL;
13257 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013258
Owen Taylor3473f882001-02-23 17:55:21 +000013259 return(ret);
13260}
13261
Daniel Veillard81273902003-09-30 00:43:48 +000013262#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013263/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013264 * xmlParseExternalEntity:
13265 * @doc: the document the chunk pertains to
13266 * @sax: the SAX handler bloc (possibly NULL)
13267 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13268 * @depth: Used for loop detection, use 0
13269 * @URL: the URL for the entity to load
13270 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013271 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013272 *
13273 * Parse an external general entity
13274 * An external general parsed entity is well-formed if it matches the
13275 * production labeled extParsedEnt.
13276 *
13277 * [78] extParsedEnt ::= TextDecl? content
13278 *
13279 * Returns 0 if the entity is well formed, -1 in case of args problem and
13280 * the parser error code otherwise
13281 */
13282
13283int
13284xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013285 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013286 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013287 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013288}
13289
13290/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013291 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013292 * @doc: the document the chunk pertains to
13293 * @sax: the SAX handler bloc (possibly NULL)
13294 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13295 * @depth: Used for loop detection, use 0
13296 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013297 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013298 *
13299 * Parse a well-balanced chunk of an XML document
13300 * called by the parser
13301 * The allowed sequence for the Well Balanced Chunk is the one defined by
13302 * the content production in the XML grammar:
13303 *
13304 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13305 *
13306 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13307 * the parser error code otherwise
13308 */
13309
13310int
13311xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013312 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013313 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13314 depth, string, lst, 0 );
13315}
Daniel Veillard81273902003-09-30 00:43:48 +000013316#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013317
13318/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013319 * xmlParseBalancedChunkMemoryInternal:
13320 * @oldctxt: the existing parsing context
13321 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13322 * @user_data: the user data field for the parser context
13323 * @lst: the return value for the set of parsed nodes
13324 *
13325 *
13326 * Parse a well-balanced chunk of an XML document
13327 * called by the parser
13328 * The allowed sequence for the Well Balanced Chunk is the one defined by
13329 * the content production in the XML grammar:
13330 *
13331 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13332 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013333 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13334 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013335 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013336 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013337 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013338 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013339static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013340xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13341 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13342 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013343 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013344 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013345 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013346 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013347 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013348 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013349 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013350#ifdef SAX2
13351 int i;
13352#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013353
Daniel Veillard0161e632008-08-28 15:36:32 +000013354 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13355 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013356 return(XML_ERR_ENTITY_LOOP);
13357 }
13358
13359
13360 if (lst != NULL)
13361 *lst = NULL;
13362 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013363 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013364
13365 size = xmlStrlen(string);
13366
13367 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013368 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013369 if (user_data != NULL)
13370 ctxt->userData = user_data;
13371 else
13372 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013373 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13374 ctxt->dict = oldctxt->dict;
Daniel Veillardad88b542017-12-08 09:42:31 +010013375 ctxt->input_id = oldctxt->input_id + 1;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013376 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13377 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13378 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013379
Daniel Veillard74eaec12009-08-26 15:57:20 +020013380#ifdef SAX2
13381 /* propagate namespaces down the entity */
13382 for (i = 0;i < oldctxt->nsNr;i += 2) {
13383 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13384 }
13385#endif
13386
Daniel Veillard328f48c2002-11-15 15:24:34 +000013387 oldsax = ctxt->sax;
13388 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013389 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013390 ctxt->replaceEntities = oldctxt->replaceEntities;
13391 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013392
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013393 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013394 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013395 newDoc = xmlNewDoc(BAD_CAST "1.0");
13396 if (newDoc == NULL) {
13397 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013398 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013399 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013400 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013401 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013402 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013403 newDoc->dict = ctxt->dict;
13404 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013405 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013406 } else {
13407 ctxt->myDoc = oldctxt->myDoc;
13408 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013409 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013410 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013411 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13412 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013413 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013414 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013415 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013416 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013417 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013418 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013419 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013420 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013421 ctxt->myDoc->children = NULL;
13422 ctxt->myDoc->last = NULL;
13423 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013424 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013425 ctxt->instate = XML_PARSER_CONTENT;
13426 ctxt->depth = oldctxt->depth + 1;
13427
Daniel Veillard328f48c2002-11-15 15:24:34 +000013428 ctxt->validate = 0;
13429 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013430 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13431 /*
13432 * ID/IDREF registration will be done in xmlValidateElement below
13433 */
13434 ctxt->loadsubset |= XML_SKIP_IDS;
13435 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013436 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013437 ctxt->attsDefault = oldctxt->attsDefault;
13438 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013439
Daniel Veillard68e9e742002-11-16 15:35:11 +000013440 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013441 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013443 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013444 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013445 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013446 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013447 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013448 }
13449
13450 if (!ctxt->wellFormed) {
13451 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013452 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013453 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013454 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013455 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013456 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013457 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013458
William M. Brack7b9154b2003-09-27 19:23:50 +000013459 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013460 xmlNodePtr cur;
13461
13462 /*
13463 * Return the newly created nodeset after unlinking it from
13464 * they pseudo parent.
13465 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013466 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013467 *lst = cur;
13468 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013469#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013470 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13471 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13472 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013473 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13474 oldctxt->myDoc, cur);
13475 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013476#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013477 cur->parent = NULL;
13478 cur = cur->next;
13479 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013480 ctxt->myDoc->children->children = NULL;
13481 }
13482 if (ctxt->myDoc != NULL) {
13483 xmlFreeNode(ctxt->myDoc->children);
13484 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013485 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013486 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013487
13488 /*
13489 * Record in the parent context the number of entities replacement
13490 * done when parsing that reference.
13491 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013492 if (oldctxt != NULL)
13493 oldctxt->nbentities += ctxt->nbentities;
13494
Daniel Veillard0161e632008-08-28 15:36:32 +000013495 /*
13496 * Also record the last error if any
13497 */
13498 if (ctxt->lastError.code != XML_ERR_OK)
13499 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13500
Daniel Veillard328f48c2002-11-15 15:24:34 +000013501 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013502 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013503 ctxt->attsDefault = NULL;
13504 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013505 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013506 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013507 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013508 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013509
Daniel Veillard328f48c2002-11-15 15:24:34 +000013510 return(ret);
13511}
13512
Daniel Veillard29b17482004-08-16 00:39:03 +000013513/**
13514 * xmlParseInNodeContext:
13515 * @node: the context node
13516 * @data: the input string
13517 * @datalen: the input string length in bytes
13518 * @options: a combination of xmlParserOption
13519 * @lst: the return value for the set of parsed nodes
13520 *
13521 * Parse a well-balanced chunk of an XML document
13522 * within the context (DTD, namespaces, etc ...) of the given node.
13523 *
13524 * The allowed sequence for the data is a Well Balanced Chunk defined by
13525 * the content production in the XML grammar:
13526 *
13527 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13528 *
13529 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13530 * error code otherwise
13531 */
13532xmlParserErrors
13533xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13534 int options, xmlNodePtr *lst) {
13535#ifdef SAX2
13536 xmlParserCtxtPtr ctxt;
13537 xmlDocPtr doc = NULL;
13538 xmlNodePtr fake, cur;
13539 int nsnr = 0;
13540
13541 xmlParserErrors ret = XML_ERR_OK;
13542
13543 /*
13544 * check all input parameters, grab the document
13545 */
13546 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13547 return(XML_ERR_INTERNAL_ERROR);
13548 switch (node->type) {
13549 case XML_ELEMENT_NODE:
13550 case XML_ATTRIBUTE_NODE:
13551 case XML_TEXT_NODE:
13552 case XML_CDATA_SECTION_NODE:
13553 case XML_ENTITY_REF_NODE:
13554 case XML_PI_NODE:
13555 case XML_COMMENT_NODE:
13556 case XML_DOCUMENT_NODE:
13557 case XML_HTML_DOCUMENT_NODE:
13558 break;
13559 default:
13560 return(XML_ERR_INTERNAL_ERROR);
13561
13562 }
13563 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13564 (node->type != XML_DOCUMENT_NODE) &&
13565 (node->type != XML_HTML_DOCUMENT_NODE))
13566 node = node->parent;
13567 if (node == NULL)
13568 return(XML_ERR_INTERNAL_ERROR);
13569 if (node->type == XML_ELEMENT_NODE)
13570 doc = node->doc;
13571 else
13572 doc = (xmlDocPtr) node;
13573 if (doc == NULL)
13574 return(XML_ERR_INTERNAL_ERROR);
13575
13576 /*
13577 * allocate a context and set-up everything not related to the
13578 * node position in the tree
13579 */
13580 if (doc->type == XML_DOCUMENT_NODE)
13581 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13582#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013583 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013584 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013585 /*
13586 * When parsing in context, it makes no sense to add implied
13587 * elements like html/body/etc...
13588 */
13589 options |= HTML_PARSE_NOIMPLIED;
13590 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013591#endif
13592 else
13593 return(XML_ERR_INTERNAL_ERROR);
13594
13595 if (ctxt == NULL)
13596 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013597
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013598 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013599 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13600 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13601 * we must wait until the last moment to free the original one.
13602 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013603 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013604 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013605 xmlDictFree(ctxt->dict);
13606 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013607 } else
13608 options |= XML_PARSE_NODICT;
13609
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013610 if (doc->encoding != NULL) {
13611 xmlCharEncodingHandlerPtr hdlr;
13612
13613 if (ctxt->encoding != NULL)
13614 xmlFree((xmlChar *) ctxt->encoding);
13615 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13616
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013617 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013618 if (hdlr != NULL) {
13619 xmlSwitchToEncoding(ctxt, hdlr);
13620 } else {
13621 return(XML_ERR_UNSUPPORTED_ENCODING);
13622 }
13623 }
13624
Daniel Veillard37334572008-07-31 08:20:02 +000013625 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013626 xmlDetectSAX2(ctxt);
13627 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013628 /* parsing in context, i.e. as within existing content */
Daniel Veillardad88b542017-12-08 09:42:31 +010013629 ctxt->input_id = 2;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013630 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013631
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013632 fake = xmlNewComment(NULL);
13633 if (fake == NULL) {
13634 xmlFreeParserCtxt(ctxt);
13635 return(XML_ERR_NO_MEMORY);
13636 }
13637 xmlAddChild(node, fake);
13638
Daniel Veillard29b17482004-08-16 00:39:03 +000013639 if (node->type == XML_ELEMENT_NODE) {
13640 nodePush(ctxt, node);
13641 /*
13642 * initialize the SAX2 namespaces stack
13643 */
13644 cur = node;
13645 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13646 xmlNsPtr ns = cur->nsDef;
13647 const xmlChar *iprefix, *ihref;
13648
13649 while (ns != NULL) {
13650 if (ctxt->dict) {
13651 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13652 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13653 } else {
13654 iprefix = ns->prefix;
13655 ihref = ns->href;
13656 }
13657
13658 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13659 nsPush(ctxt, iprefix, ihref);
13660 nsnr++;
13661 }
13662 ns = ns->next;
13663 }
13664 cur = cur->parent;
13665 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013666 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013667
13668 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13669 /*
13670 * ID/IDREF registration will be done in xmlValidateElement below
13671 */
13672 ctxt->loadsubset |= XML_SKIP_IDS;
13673 }
13674
Daniel Veillard499cc922006-01-18 17:22:35 +000013675#ifdef LIBXML_HTML_ENABLED
13676 if (doc->type == XML_HTML_DOCUMENT_NODE)
13677 __htmlParseContent(ctxt);
13678 else
13679#endif
13680 xmlParseContent(ctxt);
13681
Daniel Veillard29b17482004-08-16 00:39:03 +000013682 nsPop(ctxt, nsnr);
13683 if ((RAW == '<') && (NXT(1) == '/')) {
13684 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13685 } else if (RAW != 0) {
13686 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13687 }
13688 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13689 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13690 ctxt->wellFormed = 0;
13691 }
13692
13693 if (!ctxt->wellFormed) {
13694 if (ctxt->errNo == 0)
13695 ret = XML_ERR_INTERNAL_ERROR;
13696 else
13697 ret = (xmlParserErrors)ctxt->errNo;
13698 } else {
13699 ret = XML_ERR_OK;
13700 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013701
Daniel Veillard29b17482004-08-16 00:39:03 +000013702 /*
13703 * Return the newly created nodeset after unlinking it from
13704 * the pseudo sibling.
13705 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013706
Daniel Veillard29b17482004-08-16 00:39:03 +000013707 cur = fake->next;
13708 fake->next = NULL;
13709 node->last = fake;
13710
13711 if (cur != NULL) {
13712 cur->prev = NULL;
13713 }
13714
13715 *lst = cur;
13716
13717 while (cur != NULL) {
13718 cur->parent = NULL;
13719 cur = cur->next;
13720 }
13721
13722 xmlUnlinkNode(fake);
13723 xmlFreeNode(fake);
13724
13725
13726 if (ret != XML_ERR_OK) {
13727 xmlFreeNodeList(*lst);
13728 *lst = NULL;
13729 }
William M. Brackc3f81342004-10-03 01:22:44 +000013730
William M. Brackb7b54de2004-10-06 16:38:01 +000013731 if (doc->dict != NULL)
13732 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013733 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013734
Daniel Veillard29b17482004-08-16 00:39:03 +000013735 return(ret);
13736#else /* !SAX2 */
13737 return(XML_ERR_INTERNAL_ERROR);
13738#endif
13739}
13740
Daniel Veillard81273902003-09-30 00:43:48 +000013741#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013742/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013743 * xmlParseBalancedChunkMemoryRecover:
13744 * @doc: the document the chunk pertains to
13745 * @sax: the SAX handler bloc (possibly NULL)
13746 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13747 * @depth: Used for loop detection, use 0
13748 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13749 * @lst: the return value for the set of parsed nodes
13750 * @recover: return nodes even if the data is broken (use 0)
13751 *
13752 *
13753 * Parse a well-balanced chunk of an XML document
13754 * called by the parser
13755 * The allowed sequence for the Well Balanced Chunk is the one defined by
13756 * the content production in the XML grammar:
13757 *
13758 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13759 *
13760 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13761 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013762 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013763 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013764 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13765 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013766 */
13767int
13768xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013769 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013770 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013771 xmlParserCtxtPtr ctxt;
13772 xmlDocPtr newDoc;
13773 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013774 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013775 int size;
13776 int ret = 0;
13777
Daniel Veillard0161e632008-08-28 15:36:32 +000013778 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013779 return(XML_ERR_ENTITY_LOOP);
13780 }
13781
13782
Daniel Veillardcda96922001-08-21 10:56:31 +000013783 if (lst != NULL)
13784 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013785 if (string == NULL)
13786 return(-1);
13787
13788 size = xmlStrlen(string);
13789
13790 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13791 if (ctxt == NULL) return(-1);
13792 ctxt->userData = ctxt;
13793 if (sax != NULL) {
13794 oldsax = ctxt->sax;
13795 ctxt->sax = sax;
13796 if (user_data != NULL)
13797 ctxt->userData = user_data;
13798 }
13799 newDoc = xmlNewDoc(BAD_CAST "1.0");
13800 if (newDoc == NULL) {
13801 xmlFreeParserCtxt(ctxt);
13802 return(-1);
13803 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013804 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013805 if ((doc != NULL) && (doc->dict != NULL)) {
13806 xmlDictFree(ctxt->dict);
13807 ctxt->dict = doc->dict;
13808 xmlDictReference(ctxt->dict);
13809 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13810 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13811 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13812 ctxt->dictNames = 1;
13813 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013814 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013815 }
Owen Taylor3473f882001-02-23 17:55:21 +000013816 if (doc != NULL) {
13817 newDoc->intSubset = doc->intSubset;
13818 newDoc->extSubset = doc->extSubset;
13819 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013820 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13821 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013822 if (sax != NULL)
13823 ctxt->sax = oldsax;
13824 xmlFreeParserCtxt(ctxt);
13825 newDoc->intSubset = NULL;
13826 newDoc->extSubset = NULL;
13827 xmlFreeDoc(newDoc);
13828 return(-1);
13829 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013830 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13831 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013832 if (doc == NULL) {
13833 ctxt->myDoc = newDoc;
13834 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013835 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013836 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013837 /* Ensure that doc has XML spec namespace */
13838 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13839 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013840 }
13841 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardad88b542017-12-08 09:42:31 +010013842 ctxt->input_id = 2;
Owen Taylor3473f882001-02-23 17:55:21 +000013843 ctxt->depth = depth;
13844
13845 /*
13846 * Doing validity checking on chunk doesn't make sense
13847 */
13848 ctxt->validate = 0;
13849 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013850 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013851
Daniel Veillardb39bc392002-10-26 19:29:51 +000013852 if ( doc != NULL ){
13853 content = doc->children;
13854 doc->children = NULL;
13855 xmlParseContent(ctxt);
13856 doc->children = content;
13857 }
13858 else {
13859 xmlParseContent(ctxt);
13860 }
Owen Taylor3473f882001-02-23 17:55:21 +000013861 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013862 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013863 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013864 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013865 }
13866 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013867 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013868 }
13869
13870 if (!ctxt->wellFormed) {
13871 if (ctxt->errNo == 0)
13872 ret = 1;
13873 else
13874 ret = ctxt->errNo;
13875 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013876 ret = 0;
13877 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013878
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013879 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13880 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013881
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013882 /*
13883 * Return the newly created nodeset after unlinking it from
13884 * they pseudo parent.
13885 */
13886 cur = newDoc->children->children;
13887 *lst = cur;
13888 while (cur != NULL) {
13889 xmlSetTreeDoc(cur, doc);
13890 cur->parent = NULL;
13891 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013892 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013893 newDoc->children->children = NULL;
13894 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013895
13896 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013897 ctxt->sax = oldsax;
13898 xmlFreeParserCtxt(ctxt);
13899 newDoc->intSubset = NULL;
13900 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013901 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013902 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013903
Owen Taylor3473f882001-02-23 17:55:21 +000013904 return(ret);
13905}
13906
13907/**
13908 * xmlSAXParseEntity:
13909 * @sax: the SAX handler block
13910 * @filename: the filename
13911 *
13912 * parse an XML external entity out of context and build a tree.
13913 * It use the given SAX function block to handle the parsing callback.
13914 * If sax is NULL, fallback to the default DOM tree building routines.
13915 *
13916 * [78] extParsedEnt ::= TextDecl? content
13917 *
13918 * This correspond to a "Well Balanced" chunk
13919 *
13920 * Returns the resulting document tree
13921 */
13922
13923xmlDocPtr
13924xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13925 xmlDocPtr ret;
13926 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013927
13928 ctxt = xmlCreateFileParserCtxt(filename);
13929 if (ctxt == NULL) {
13930 return(NULL);
13931 }
13932 if (sax != NULL) {
13933 if (ctxt->sax != NULL)
13934 xmlFree(ctxt->sax);
13935 ctxt->sax = sax;
13936 ctxt->userData = NULL;
13937 }
13938
Owen Taylor3473f882001-02-23 17:55:21 +000013939 xmlParseExtParsedEnt(ctxt);
13940
13941 if (ctxt->wellFormed)
13942 ret = ctxt->myDoc;
13943 else {
13944 ret = NULL;
13945 xmlFreeDoc(ctxt->myDoc);
13946 ctxt->myDoc = NULL;
13947 }
13948 if (sax != NULL)
13949 ctxt->sax = NULL;
13950 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013951
Owen Taylor3473f882001-02-23 17:55:21 +000013952 return(ret);
13953}
13954
13955/**
13956 * xmlParseEntity:
13957 * @filename: the filename
13958 *
13959 * parse an XML external entity out of context and build a tree.
13960 *
13961 * [78] extParsedEnt ::= TextDecl? content
13962 *
13963 * This correspond to a "Well Balanced" chunk
13964 *
13965 * Returns the resulting document tree
13966 */
13967
13968xmlDocPtr
13969xmlParseEntity(const char *filename) {
13970 return(xmlSAXParseEntity(NULL, filename));
13971}
Daniel Veillard81273902003-09-30 00:43:48 +000013972#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013973
13974/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013975 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013976 * @URL: the entity URL
13977 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013978 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013979 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013980 *
13981 * Create a parser context for an external entity
13982 * Automatic support for ZLIB/Compress compressed document is provided
13983 * by default if found at compile-time.
13984 *
13985 * Returns the new parser context or NULL
13986 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013987static xmlParserCtxtPtr
13988xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13989 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013990 xmlParserCtxtPtr ctxt;
13991 xmlParserInputPtr inputStream;
13992 char *directory = NULL;
13993 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013994
Owen Taylor3473f882001-02-23 17:55:21 +000013995 ctxt = xmlNewParserCtxt();
13996 if (ctxt == NULL) {
13997 return(NULL);
13998 }
13999
Daniel Veillard48247b42009-07-10 16:12:46 +020014000 if (pctx != NULL) {
14001 ctxt->options = pctx->options;
14002 ctxt->_private = pctx->_private;
Daniel Veillardad88b542017-12-08 09:42:31 +010014003 /*
14004 * this is a subparser of pctx, so the input_id should be
14005 * incremented to distinguish from main entity
14006 */
14007 ctxt->input_id = pctx->input_id + 1;
Rob Richards9c0aa472009-03-26 18:10:19 +000014008 }
14009
Owen Taylor3473f882001-02-23 17:55:21 +000014010 uri = xmlBuildURI(URL, base);
14011
14012 if (uri == NULL) {
14013 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14014 if (inputStream == NULL) {
14015 xmlFreeParserCtxt(ctxt);
14016 return(NULL);
14017 }
14018
14019 inputPush(ctxt, inputStream);
14020
14021 if ((ctxt->directory == NULL) && (directory == NULL))
14022 directory = xmlParserGetDirectory((char *)URL);
14023 if ((ctxt->directory == NULL) && (directory != NULL))
14024 ctxt->directory = directory;
14025 } else {
14026 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14027 if (inputStream == NULL) {
14028 xmlFree(uri);
14029 xmlFreeParserCtxt(ctxt);
14030 return(NULL);
14031 }
14032
14033 inputPush(ctxt, inputStream);
14034
14035 if ((ctxt->directory == NULL) && (directory == NULL))
14036 directory = xmlParserGetDirectory((char *)uri);
14037 if ((ctxt->directory == NULL) && (directory != NULL))
14038 ctxt->directory = directory;
14039 xmlFree(uri);
14040 }
Owen Taylor3473f882001-02-23 17:55:21 +000014041 return(ctxt);
14042}
14043
Rob Richards9c0aa472009-03-26 18:10:19 +000014044/**
14045 * xmlCreateEntityParserCtxt:
14046 * @URL: the entity URL
14047 * @ID: the entity PUBLIC ID
14048 * @base: a possible base for the target URI
14049 *
14050 * Create a parser context for an external entity
14051 * Automatic support for ZLIB/Compress compressed document is provided
14052 * by default if found at compile-time.
14053 *
14054 * Returns the new parser context or NULL
14055 */
14056xmlParserCtxtPtr
14057xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14058 const xmlChar *base) {
14059 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14060
14061}
14062
Owen Taylor3473f882001-02-23 17:55:21 +000014063/************************************************************************
14064 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014065 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014066 * *
14067 ************************************************************************/
14068
14069/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014070 * xmlCreateURLParserCtxt:
14071 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014072 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014073 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014074 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014075 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014076 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014077 *
14078 * Returns the new parser context or NULL
14079 */
14080xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014081xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014082{
14083 xmlParserCtxtPtr ctxt;
14084 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014085 char *directory = NULL;
14086
Owen Taylor3473f882001-02-23 17:55:21 +000014087 ctxt = xmlNewParserCtxt();
14088 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014089 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014090 return(NULL);
14091 }
14092
Daniel Veillarddf292f72005-01-16 19:00:15 +000014093 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014094 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014095 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014096
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014097 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014098 if (inputStream == NULL) {
14099 xmlFreeParserCtxt(ctxt);
14100 return(NULL);
14101 }
14102
Owen Taylor3473f882001-02-23 17:55:21 +000014103 inputPush(ctxt, inputStream);
14104 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014105 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014106 if ((ctxt->directory == NULL) && (directory != NULL))
14107 ctxt->directory = directory;
14108
14109 return(ctxt);
14110}
14111
Daniel Veillard61b93382003-11-03 14:28:31 +000014112/**
14113 * xmlCreateFileParserCtxt:
14114 * @filename: the filename
14115 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014116 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014117 * Automatic support for ZLIB/Compress compressed document is provided
14118 * by default if found at compile-time.
14119 *
14120 * Returns the new parser context or NULL
14121 */
14122xmlParserCtxtPtr
14123xmlCreateFileParserCtxt(const char *filename)
14124{
14125 return(xmlCreateURLParserCtxt(filename, 0));
14126}
14127
Daniel Veillard81273902003-09-30 00:43:48 +000014128#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014129/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014130 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014131 * @sax: the SAX handler block
14132 * @filename: the filename
14133 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14134 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014135 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014136 *
14137 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14138 * compressed document is provided by default if found at compile-time.
14139 * It use the given SAX function block to handle the parsing callback.
14140 * If sax is NULL, fallback to the default DOM tree building routines.
14141 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014142 * User data (void *) is stored within the parser context in the
14143 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014144 *
Owen Taylor3473f882001-02-23 17:55:21 +000014145 * Returns the resulting document tree
14146 */
14147
14148xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014149xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14150 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014151 xmlDocPtr ret;
14152 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014153
Daniel Veillard635ef722001-10-29 11:48:19 +000014154 xmlInitParser();
14155
Owen Taylor3473f882001-02-23 17:55:21 +000014156 ctxt = xmlCreateFileParserCtxt(filename);
14157 if (ctxt == NULL) {
14158 return(NULL);
14159 }
14160 if (sax != NULL) {
14161 if (ctxt->sax != NULL)
14162 xmlFree(ctxt->sax);
14163 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014164 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014165 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014166 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014167 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014168 }
Owen Taylor3473f882001-02-23 17:55:21 +000014169
Daniel Veillard37d2d162008-03-14 10:54:00 +000014170 if (ctxt->directory == NULL)
14171 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014172
Daniel Veillarddad3f682002-11-17 16:47:27 +000014173 ctxt->recovery = recovery;
14174
Owen Taylor3473f882001-02-23 17:55:21 +000014175 xmlParseDocument(ctxt);
14176
William M. Brackc07329e2003-09-08 01:57:30 +000014177 if ((ctxt->wellFormed) || recovery) {
14178 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014179 if (ret != NULL) {
14180 if (ctxt->input->buf->compressed > 0)
14181 ret->compression = 9;
14182 else
14183 ret->compression = ctxt->input->buf->compressed;
14184 }
William M. Brackc07329e2003-09-08 01:57:30 +000014185 }
Owen Taylor3473f882001-02-23 17:55:21 +000014186 else {
14187 ret = NULL;
14188 xmlFreeDoc(ctxt->myDoc);
14189 ctxt->myDoc = NULL;
14190 }
14191 if (sax != NULL)
14192 ctxt->sax = NULL;
14193 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014194
Owen Taylor3473f882001-02-23 17:55:21 +000014195 return(ret);
14196}
14197
14198/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014199 * xmlSAXParseFile:
14200 * @sax: the SAX handler block
14201 * @filename: the filename
14202 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14203 * documents
14204 *
14205 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14206 * compressed document is provided by default if found at compile-time.
14207 * It use the given SAX function block to handle the parsing callback.
14208 * If sax is NULL, fallback to the default DOM tree building routines.
14209 *
14210 * Returns the resulting document tree
14211 */
14212
14213xmlDocPtr
14214xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14215 int recovery) {
14216 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14217}
14218
14219/**
Owen Taylor3473f882001-02-23 17:55:21 +000014220 * xmlRecoverDoc:
14221 * @cur: a pointer to an array of xmlChar
14222 *
14223 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014224 * In the case the document is not Well Formed, a attempt to build a
14225 * tree is tried anyway
14226 *
14227 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014228 */
14229
14230xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014231xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014232 return(xmlSAXParseDoc(NULL, cur, 1));
14233}
14234
14235/**
14236 * xmlParseFile:
14237 * @filename: the filename
14238 *
14239 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14240 * compressed document is provided by default if found at compile-time.
14241 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014242 * Returns the resulting document tree if the file was wellformed,
14243 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014244 */
14245
14246xmlDocPtr
14247xmlParseFile(const char *filename) {
14248 return(xmlSAXParseFile(NULL, filename, 0));
14249}
14250
14251/**
14252 * xmlRecoverFile:
14253 * @filename: the filename
14254 *
14255 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14256 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014257 * In the case the document is not Well Formed, it attempts to build
14258 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014259 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014260 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014261 */
14262
14263xmlDocPtr
14264xmlRecoverFile(const char *filename) {
14265 return(xmlSAXParseFile(NULL, filename, 1));
14266}
14267
14268
14269/**
14270 * xmlSetupParserForBuffer:
14271 * @ctxt: an XML parser context
14272 * @buffer: a xmlChar * buffer
14273 * @filename: a file name
14274 *
14275 * Setup the parser context to parse a new buffer; Clears any prior
14276 * contents from the parser context. The buffer parameter must not be
14277 * NULL, but the filename parameter can be
14278 */
14279void
14280xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14281 const char* filename)
14282{
14283 xmlParserInputPtr input;
14284
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014285 if ((ctxt == NULL) || (buffer == NULL))
14286 return;
14287
Owen Taylor3473f882001-02-23 17:55:21 +000014288 input = xmlNewInputStream(ctxt);
14289 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014290 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014291 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014292 return;
14293 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014294
Owen Taylor3473f882001-02-23 17:55:21 +000014295 xmlClearParserCtxt(ctxt);
14296 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014297 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014298 input->base = buffer;
14299 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014300 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014301 inputPush(ctxt, input);
14302}
14303
14304/**
14305 * xmlSAXUserParseFile:
14306 * @sax: a SAX handler
14307 * @user_data: The user data returned on SAX callbacks
14308 * @filename: a file name
14309 *
14310 * parse an XML file and call the given SAX handler routines.
14311 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014312 *
Owen Taylor3473f882001-02-23 17:55:21 +000014313 * Returns 0 in case of success or a error number otherwise
14314 */
14315int
14316xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14317 const char *filename) {
14318 int ret = 0;
14319 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014320
Owen Taylor3473f882001-02-23 17:55:21 +000014321 ctxt = xmlCreateFileParserCtxt(filename);
14322 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014323 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014324 xmlFree(ctxt->sax);
14325 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014326 xmlDetectSAX2(ctxt);
14327
Owen Taylor3473f882001-02-23 17:55:21 +000014328 if (user_data != NULL)
14329 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014330
Owen Taylor3473f882001-02-23 17:55:21 +000014331 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014332
Owen Taylor3473f882001-02-23 17:55:21 +000014333 if (ctxt->wellFormed)
14334 ret = 0;
14335 else {
14336 if (ctxt->errNo != 0)
14337 ret = ctxt->errNo;
14338 else
14339 ret = -1;
14340 }
14341 if (sax != NULL)
14342 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014343 if (ctxt->myDoc != NULL) {
14344 xmlFreeDoc(ctxt->myDoc);
14345 ctxt->myDoc = NULL;
14346 }
Owen Taylor3473f882001-02-23 17:55:21 +000014347 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014348
Owen Taylor3473f882001-02-23 17:55:21 +000014349 return ret;
14350}
Daniel Veillard81273902003-09-30 00:43:48 +000014351#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014352
14353/************************************************************************
14354 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014355 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014356 * *
14357 ************************************************************************/
14358
14359/**
14360 * xmlCreateMemoryParserCtxt:
14361 * @buffer: a pointer to a char array
14362 * @size: the size of the array
14363 *
14364 * Create a parser context for an XML in-memory document.
14365 *
14366 * Returns the new parser context or NULL
14367 */
14368xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014369xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014370 xmlParserCtxtPtr ctxt;
14371 xmlParserInputPtr input;
14372 xmlParserInputBufferPtr buf;
14373
14374 if (buffer == NULL)
14375 return(NULL);
14376 if (size <= 0)
14377 return(NULL);
14378
14379 ctxt = xmlNewParserCtxt();
14380 if (ctxt == NULL)
14381 return(NULL);
14382
Daniel Veillard53350552003-09-18 13:35:51 +000014383 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014384 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014385 if (buf == NULL) {
14386 xmlFreeParserCtxt(ctxt);
14387 return(NULL);
14388 }
Owen Taylor3473f882001-02-23 17:55:21 +000014389
14390 input = xmlNewInputStream(ctxt);
14391 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014392 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014393 xmlFreeParserCtxt(ctxt);
14394 return(NULL);
14395 }
14396
14397 input->filename = NULL;
14398 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014399 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014400
14401 inputPush(ctxt, input);
14402 return(ctxt);
14403}
14404
Daniel Veillard81273902003-09-30 00:43:48 +000014405#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014406/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014407 * xmlSAXParseMemoryWithData:
14408 * @sax: the SAX handler block
14409 * @buffer: an pointer to a char array
14410 * @size: the size of the array
14411 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14412 * documents
14413 * @data: the userdata
14414 *
14415 * parse an XML in-memory block and use the given SAX function block
14416 * to handle the parsing callback. If sax is NULL, fallback to the default
14417 * DOM tree building routines.
14418 *
14419 * User data (void *) is stored within the parser context in the
14420 * context's _private member, so it is available nearly everywhere in libxml
14421 *
14422 * Returns the resulting document tree
14423 */
14424
14425xmlDocPtr
14426xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14427 int size, int recovery, void *data) {
14428 xmlDocPtr ret;
14429 xmlParserCtxtPtr ctxt;
14430
Daniel Veillardab2a7632009-07-09 08:45:03 +020014431 xmlInitParser();
14432
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014433 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14434 if (ctxt == NULL) return(NULL);
14435 if (sax != NULL) {
14436 if (ctxt->sax != NULL)
14437 xmlFree(ctxt->sax);
14438 ctxt->sax = sax;
14439 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014440 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014441 if (data!=NULL) {
14442 ctxt->_private=data;
14443 }
14444
Daniel Veillardadba5f12003-04-04 16:09:01 +000014445 ctxt->recovery = recovery;
14446
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014447 xmlParseDocument(ctxt);
14448
14449 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14450 else {
14451 ret = NULL;
14452 xmlFreeDoc(ctxt->myDoc);
14453 ctxt->myDoc = NULL;
14454 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014455 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014456 ctxt->sax = NULL;
14457 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014458
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014459 return(ret);
14460}
14461
14462/**
Owen Taylor3473f882001-02-23 17:55:21 +000014463 * xmlSAXParseMemory:
14464 * @sax: the SAX handler block
14465 * @buffer: an pointer to a char array
14466 * @size: the size of the array
14467 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14468 * documents
14469 *
14470 * parse an XML in-memory block and use the given SAX function block
14471 * to handle the parsing callback. If sax is NULL, fallback to the default
14472 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014473 *
Owen Taylor3473f882001-02-23 17:55:21 +000014474 * Returns the resulting document tree
14475 */
14476xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014477xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14478 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014479 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014480}
14481
14482/**
14483 * xmlParseMemory:
14484 * @buffer: an pointer to a char array
14485 * @size: the size of the array
14486 *
14487 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014488 *
Owen Taylor3473f882001-02-23 17:55:21 +000014489 * Returns the resulting document tree
14490 */
14491
Daniel Veillard50822cb2001-07-26 20:05:51 +000014492xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014493 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14494}
14495
14496/**
14497 * xmlRecoverMemory:
14498 * @buffer: an pointer to a char array
14499 * @size: the size of the array
14500 *
14501 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014502 * In the case the document is not Well Formed, an attempt to
14503 * build a tree is tried anyway
14504 *
14505 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014506 */
14507
Daniel Veillard50822cb2001-07-26 20:05:51 +000014508xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014509 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14510}
14511
14512/**
14513 * xmlSAXUserParseMemory:
14514 * @sax: a SAX handler
14515 * @user_data: The user data returned on SAX callbacks
14516 * @buffer: an in-memory XML document input
14517 * @size: the length of the XML document in bytes
14518 *
14519 * A better SAX parsing routine.
14520 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014521 *
Owen Taylor3473f882001-02-23 17:55:21 +000014522 * Returns 0 in case of success or a error number otherwise
14523 */
14524int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014525 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014526 int ret = 0;
14527 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014528
14529 xmlInitParser();
14530
Owen Taylor3473f882001-02-23 17:55:21 +000014531 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14532 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014533 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14534 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014535 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014536 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014537
Daniel Veillard30211a02001-04-26 09:33:18 +000014538 if (user_data != NULL)
14539 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014540
Owen Taylor3473f882001-02-23 17:55:21 +000014541 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014542
Owen Taylor3473f882001-02-23 17:55:21 +000014543 if (ctxt->wellFormed)
14544 ret = 0;
14545 else {
14546 if (ctxt->errNo != 0)
14547 ret = ctxt->errNo;
14548 else
14549 ret = -1;
14550 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014551 if (sax != NULL)
14552 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014553 if (ctxt->myDoc != NULL) {
14554 xmlFreeDoc(ctxt->myDoc);
14555 ctxt->myDoc = NULL;
14556 }
Owen Taylor3473f882001-02-23 17:55:21 +000014557 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014558
Owen Taylor3473f882001-02-23 17:55:21 +000014559 return ret;
14560}
Daniel Veillard81273902003-09-30 00:43:48 +000014561#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014562
14563/**
14564 * xmlCreateDocParserCtxt:
14565 * @cur: a pointer to an array of xmlChar
14566 *
14567 * Creates a parser context for an XML in-memory document.
14568 *
14569 * Returns the new parser context or NULL
14570 */
14571xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014572xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014573 int len;
14574
14575 if (cur == NULL)
14576 return(NULL);
14577 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014578 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014579}
14580
Daniel Veillard81273902003-09-30 00:43:48 +000014581#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014582/**
14583 * xmlSAXParseDoc:
14584 * @sax: the SAX handler block
14585 * @cur: a pointer to an array of xmlChar
14586 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14587 * documents
14588 *
14589 * parse an XML in-memory document and build a tree.
14590 * It use the given SAX function block to handle the parsing callback.
14591 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014592 *
Owen Taylor3473f882001-02-23 17:55:21 +000014593 * Returns the resulting document tree
14594 */
14595
14596xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014597xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014598 xmlDocPtr ret;
14599 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014600 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014601
Daniel Veillard38936062004-11-04 17:45:11 +000014602 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014603
14604
14605 ctxt = xmlCreateDocParserCtxt(cur);
14606 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014607 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014608 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014609 ctxt->sax = sax;
14610 ctxt->userData = NULL;
14611 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014612 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014613
14614 xmlParseDocument(ctxt);
14615 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14616 else {
14617 ret = NULL;
14618 xmlFreeDoc(ctxt->myDoc);
14619 ctxt->myDoc = NULL;
14620 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014621 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014622 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014623 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014624
Owen Taylor3473f882001-02-23 17:55:21 +000014625 return(ret);
14626}
14627
14628/**
14629 * xmlParseDoc:
14630 * @cur: a pointer to an array of xmlChar
14631 *
14632 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014633 *
Owen Taylor3473f882001-02-23 17:55:21 +000014634 * Returns the resulting document tree
14635 */
14636
14637xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014638xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014639 return(xmlSAXParseDoc(NULL, cur, 0));
14640}
Daniel Veillard81273902003-09-30 00:43:48 +000014641#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014642
Daniel Veillard81273902003-09-30 00:43:48 +000014643#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014644/************************************************************************
14645 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014646 * Specific function to keep track of entities references *
14647 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014648 * *
14649 ************************************************************************/
14650
14651static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14652
14653/**
14654 * xmlAddEntityReference:
14655 * @ent : A valid entity
14656 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014657 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014658 *
14659 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14660 */
14661static void
14662xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14663 xmlNodePtr lastNode)
14664{
14665 if (xmlEntityRefFunc != NULL) {
14666 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14667 }
14668}
14669
14670
14671/**
14672 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014673 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014674 *
14675 * Set the function to call call back when a xml reference has been made
14676 */
14677void
14678xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14679{
14680 xmlEntityRefFunc = func;
14681}
Daniel Veillard81273902003-09-30 00:43:48 +000014682#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014683
14684/************************************************************************
14685 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014686 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014687 * *
14688 ************************************************************************/
14689
14690#ifdef LIBXML_XPATH_ENABLED
14691#include <libxml/xpath.h>
14692#endif
14693
Daniel Veillardffa3c742005-07-21 13:24:09 +000014694extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014695static int xmlParserInitialized = 0;
14696
14697/**
14698 * xmlInitParser:
14699 *
14700 * Initialization function for the XML parser.
14701 * This is not reentrant. Call once before processing in case of
14702 * use in multithreaded programs.
14703 */
14704
14705void
14706xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014707 if (xmlParserInitialized != 0)
14708 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014709
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014710#ifdef LIBXML_THREAD_ENABLED
14711 __xmlGlobalInitMutexLock();
14712 if (xmlParserInitialized == 0) {
14713#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014714 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014715 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014716 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14717 (xmlGenericError == NULL))
14718 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014719 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014720 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014721 xmlInitCharEncodingHandlers();
14722 xmlDefaultSAXHandlerInit();
14723 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014724#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014725 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014726#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014727#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014728 htmlInitAutoClose();
14729 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014730#endif
14731#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014732 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014733#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014734 xmlParserInitialized = 1;
14735#ifdef LIBXML_THREAD_ENABLED
14736 }
14737 __xmlGlobalInitMutexUnlock();
14738#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014739}
14740
14741/**
14742 * xmlCleanupParser:
14743 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014744 * This function name is somewhat misleading. It does not clean up
14745 * parser state, it cleans up memory allocated by the library itself.
14746 * It is a cleanup function for the XML library. It tries to reclaim all
14747 * related global memory allocated for the library processing.
14748 * It doesn't deallocate any document related memory. One should
14749 * call xmlCleanupParser() only when the process has finished using
14750 * the library and all XML/HTML documents built with it.
14751 * See also xmlInitParser() which has the opposite function of preparing
14752 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014753 *
14754 * WARNING: if your application is multithreaded or has plugin support
14755 * calling this may crash the application if another thread or
14756 * a plugin is still using libxml2. It's sometimes very hard to
14757 * guess if libxml2 is in use in the application, some libraries
14758 * or plugins may use it without notice. In case of doubt abstain
14759 * from calling this function or do it just before calling exit()
14760 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014761 */
14762
14763void
14764xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014765 if (!xmlParserInitialized)
14766 return;
14767
Owen Taylor3473f882001-02-23 17:55:21 +000014768 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014769#ifdef LIBXML_CATALOG_ENABLED
14770 xmlCatalogCleanup();
14771#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014772 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014773 xmlCleanupInputCallbacks();
14774#ifdef LIBXML_OUTPUT_ENABLED
14775 xmlCleanupOutputCallbacks();
14776#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014777#ifdef LIBXML_SCHEMAS_ENABLED
14778 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014779 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014780#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014781 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014782 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014783 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014784 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014785 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014786}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014787
14788/************************************************************************
14789 * *
14790 * New set (2.6.0) of simpler and more flexible APIs *
14791 * *
14792 ************************************************************************/
14793
14794/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014795 * DICT_FREE:
14796 * @str: a string
14797 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014798 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014799 * current scope
14800 */
14801#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014802 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014803 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14804 xmlFree((char *)(str));
14805
14806/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014807 * xmlCtxtReset:
14808 * @ctxt: an XML parser context
14809 *
14810 * Reset a parser context
14811 */
14812void
14813xmlCtxtReset(xmlParserCtxtPtr ctxt)
14814{
14815 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014816 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014817
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014818 if (ctxt == NULL)
14819 return;
14820
14821 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014822
14823 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14824 xmlFreeInputStream(input);
14825 }
14826 ctxt->inputNr = 0;
14827 ctxt->input = NULL;
14828
14829 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014830 if (ctxt->spaceTab != NULL) {
14831 ctxt->spaceTab[0] = -1;
14832 ctxt->space = &ctxt->spaceTab[0];
14833 } else {
14834 ctxt->space = NULL;
14835 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014836
14837
14838 ctxt->nodeNr = 0;
14839 ctxt->node = NULL;
14840
14841 ctxt->nameNr = 0;
14842 ctxt->name = NULL;
14843
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014844 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014845 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014846 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014847 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014848 DICT_FREE(ctxt->directory);
14849 ctxt->directory = NULL;
14850 DICT_FREE(ctxt->extSubURI);
14851 ctxt->extSubURI = NULL;
14852 DICT_FREE(ctxt->extSubSystem);
14853 ctxt->extSubSystem = NULL;
14854 if (ctxt->myDoc != NULL)
14855 xmlFreeDoc(ctxt->myDoc);
14856 ctxt->myDoc = NULL;
14857
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014858 ctxt->standalone = -1;
14859 ctxt->hasExternalSubset = 0;
14860 ctxt->hasPErefs = 0;
14861 ctxt->html = 0;
14862 ctxt->external = 0;
14863 ctxt->instate = XML_PARSER_START;
14864 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014865
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014866 ctxt->wellFormed = 1;
14867 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014868 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014869 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014870#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014871 ctxt->vctxt.userData = ctxt;
14872 ctxt->vctxt.error = xmlParserValidityError;
14873 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014874#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014875 ctxt->record_info = 0;
14876 ctxt->nbChars = 0;
14877 ctxt->checkIndex = 0;
14878 ctxt->inSubset = 0;
14879 ctxt->errNo = XML_ERR_OK;
14880 ctxt->depth = 0;
14881 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14882 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014883 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014884 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014885 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014886 xmlInitNodeInfoSeq(&ctxt->node_seq);
14887
14888 if (ctxt->attsDefault != NULL) {
Nick Wellnhofere03f0a12017-11-09 16:42:47 +010014889 xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014890 ctxt->attsDefault = NULL;
14891 }
14892 if (ctxt->attsSpecial != NULL) {
14893 xmlHashFree(ctxt->attsSpecial, NULL);
14894 ctxt->attsSpecial = NULL;
14895 }
14896
Daniel Veillard4432df22003-09-28 18:58:27 +000014897#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014898 if (ctxt->catalogs != NULL)
14899 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014900#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014901 if (ctxt->lastError.code != XML_ERR_OK)
14902 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014903}
14904
14905/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014906 * xmlCtxtResetPush:
14907 * @ctxt: an XML parser context
14908 * @chunk: a pointer to an array of chars
14909 * @size: number of chars in the array
14910 * @filename: an optional file name or URI
14911 * @encoding: the document encoding, or NULL
14912 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014913 * Reset a push parser context
14914 *
14915 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014916 */
14917int
14918xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14919 int size, const char *filename, const char *encoding)
14920{
14921 xmlParserInputPtr inputStream;
14922 xmlParserInputBufferPtr buf;
14923 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14924
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014925 if (ctxt == NULL)
14926 return(1);
14927
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014928 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14929 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14930
14931 buf = xmlAllocParserInputBuffer(enc);
14932 if (buf == NULL)
14933 return(1);
14934
14935 if (ctxt == NULL) {
14936 xmlFreeParserInputBuffer(buf);
14937 return(1);
14938 }
14939
14940 xmlCtxtReset(ctxt);
14941
14942 if (ctxt->pushTab == NULL) {
14943 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14944 sizeof(xmlChar *));
14945 if (ctxt->pushTab == NULL) {
14946 xmlErrMemory(ctxt, NULL);
14947 xmlFreeParserInputBuffer(buf);
14948 return(1);
14949 }
14950 }
14951
14952 if (filename == NULL) {
14953 ctxt->directory = NULL;
14954 } else {
14955 ctxt->directory = xmlParserGetDirectory(filename);
14956 }
14957
14958 inputStream = xmlNewInputStream(ctxt);
14959 if (inputStream == NULL) {
14960 xmlFreeParserInputBuffer(buf);
14961 return(1);
14962 }
14963
14964 if (filename == NULL)
14965 inputStream->filename = NULL;
14966 else
14967 inputStream->filename = (char *)
14968 xmlCanonicPath((const xmlChar *) filename);
14969 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014970 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014971
14972 inputPush(ctxt, inputStream);
14973
14974 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14975 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014976 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14977 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014978
14979 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14980
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014981 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014982#ifdef DEBUG_PUSH
14983 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14984#endif
14985 }
14986
14987 if (encoding != NULL) {
14988 xmlCharEncodingHandlerPtr hdlr;
14989
Daniel Veillard37334572008-07-31 08:20:02 +000014990 if (ctxt->encoding != NULL)
14991 xmlFree((xmlChar *) ctxt->encoding);
14992 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14993
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014994 hdlr = xmlFindCharEncodingHandler(encoding);
14995 if (hdlr != NULL) {
14996 xmlSwitchToEncoding(ctxt, hdlr);
14997 } else {
14998 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14999 "Unsupported encoding %s\n", BAD_CAST encoding);
15000 }
15001 } else if (enc != XML_CHAR_ENCODING_NONE) {
15002 xmlSwitchEncoding(ctxt, enc);
15003 }
15004
15005 return(0);
15006}
15007
Daniel Veillard37334572008-07-31 08:20:02 +000015008
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015009/**
Daniel Veillard37334572008-07-31 08:20:02 +000015010 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015011 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015012 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015013 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015014 *
15015 * Applies the options to the parser context
15016 *
15017 * Returns 0 in case of success, the set of unknown or unimplemented options
15018 * in case of error.
15019 */
Daniel Veillard37334572008-07-31 08:20:02 +000015020static int
15021xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015022{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015023 if (ctxt == NULL)
15024 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015025 if (encoding != NULL) {
15026 if (ctxt->encoding != NULL)
15027 xmlFree((xmlChar *) ctxt->encoding);
15028 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15029 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015030 if (options & XML_PARSE_RECOVER) {
15031 ctxt->recovery = 1;
15032 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015033 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015034 } else
15035 ctxt->recovery = 0;
15036 if (options & XML_PARSE_DTDLOAD) {
15037 ctxt->loadsubset = XML_DETECT_IDS;
15038 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015039 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015040 } else
15041 ctxt->loadsubset = 0;
15042 if (options & XML_PARSE_DTDATTR) {
15043 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15044 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015045 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015046 }
15047 if (options & XML_PARSE_NOENT) {
15048 ctxt->replaceEntities = 1;
15049 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15050 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015051 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015052 } else
15053 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015054 if (options & XML_PARSE_PEDANTIC) {
15055 ctxt->pedantic = 1;
15056 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015057 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015058 } else
15059 ctxt->pedantic = 0;
15060 if (options & XML_PARSE_NOBLANKS) {
15061 ctxt->keepBlanks = 0;
15062 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15063 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015064 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015065 } else
15066 ctxt->keepBlanks = 1;
15067 if (options & XML_PARSE_DTDVALID) {
15068 ctxt->validate = 1;
15069 if (options & XML_PARSE_NOWARNING)
15070 ctxt->vctxt.warning = NULL;
15071 if (options & XML_PARSE_NOERROR)
15072 ctxt->vctxt.error = NULL;
15073 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015074 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015075 } else
15076 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015077 if (options & XML_PARSE_NOWARNING) {
15078 ctxt->sax->warning = NULL;
15079 options -= XML_PARSE_NOWARNING;
15080 }
15081 if (options & XML_PARSE_NOERROR) {
15082 ctxt->sax->error = NULL;
15083 ctxt->sax->fatalError = NULL;
15084 options -= XML_PARSE_NOERROR;
15085 }
Daniel Veillard81273902003-09-30 00:43:48 +000015086#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015087 if (options & XML_PARSE_SAX1) {
15088 ctxt->sax->startElement = xmlSAX2StartElement;
15089 ctxt->sax->endElement = xmlSAX2EndElement;
15090 ctxt->sax->startElementNs = NULL;
15091 ctxt->sax->endElementNs = NULL;
15092 ctxt->sax->initialized = 1;
15093 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015094 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015095 }
Daniel Veillard81273902003-09-30 00:43:48 +000015096#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015097 if (options & XML_PARSE_NODICT) {
15098 ctxt->dictNames = 0;
15099 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015100 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015101 } else {
15102 ctxt->dictNames = 1;
15103 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015104 if (options & XML_PARSE_NOCDATA) {
15105 ctxt->sax->cdataBlock = NULL;
15106 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015107 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015108 }
15109 if (options & XML_PARSE_NSCLEAN) {
15110 ctxt->options |= XML_PARSE_NSCLEAN;
15111 options -= XML_PARSE_NSCLEAN;
15112 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015113 if (options & XML_PARSE_NONET) {
15114 ctxt->options |= XML_PARSE_NONET;
15115 options -= XML_PARSE_NONET;
15116 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015117 if (options & XML_PARSE_COMPACT) {
15118 ctxt->options |= XML_PARSE_COMPACT;
15119 options -= XML_PARSE_COMPACT;
15120 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015121 if (options & XML_PARSE_OLD10) {
15122 ctxt->options |= XML_PARSE_OLD10;
15123 options -= XML_PARSE_OLD10;
15124 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015125 if (options & XML_PARSE_NOBASEFIX) {
15126 ctxt->options |= XML_PARSE_NOBASEFIX;
15127 options -= XML_PARSE_NOBASEFIX;
15128 }
15129 if (options & XML_PARSE_HUGE) {
15130 ctxt->options |= XML_PARSE_HUGE;
15131 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015132 if (ctxt->dict != NULL)
15133 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015134 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015135 if (options & XML_PARSE_OLDSAX) {
15136 ctxt->options |= XML_PARSE_OLDSAX;
15137 options -= XML_PARSE_OLDSAX;
15138 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015139 if (options & XML_PARSE_IGNORE_ENC) {
15140 ctxt->options |= XML_PARSE_IGNORE_ENC;
15141 options -= XML_PARSE_IGNORE_ENC;
15142 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015143 if (options & XML_PARSE_BIG_LINES) {
15144 ctxt->options |= XML_PARSE_BIG_LINES;
15145 options -= XML_PARSE_BIG_LINES;
15146 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015147 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015148 return (options);
15149}
15150
15151/**
Daniel Veillard37334572008-07-31 08:20:02 +000015152 * xmlCtxtUseOptions:
15153 * @ctxt: an XML parser context
15154 * @options: a combination of xmlParserOption
15155 *
15156 * Applies the options to the parser context
15157 *
15158 * Returns 0 in case of success, the set of unknown or unimplemented options
15159 * in case of error.
15160 */
15161int
15162xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15163{
15164 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15165}
15166
15167/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015168 * xmlDoRead:
15169 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015170 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015171 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015172 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015173 * @reuse: keep the context for reuse
15174 *
15175 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015176 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015177 * Returns the resulting document tree or NULL
15178 */
15179static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015180xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15181 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015182{
15183 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015184
15185 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015186 if (encoding != NULL) {
15187 xmlCharEncodingHandlerPtr hdlr;
15188
15189 hdlr = xmlFindCharEncodingHandler(encoding);
15190 if (hdlr != NULL)
15191 xmlSwitchToEncoding(ctxt, hdlr);
15192 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015193 if ((URL != NULL) && (ctxt->input != NULL) &&
15194 (ctxt->input->filename == NULL))
15195 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015196 xmlParseDocument(ctxt);
15197 if ((ctxt->wellFormed) || ctxt->recovery)
15198 ret = ctxt->myDoc;
15199 else {
15200 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015201 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015202 xmlFreeDoc(ctxt->myDoc);
15203 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015204 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015205 ctxt->myDoc = NULL;
15206 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015207 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015208 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015209
15210 return (ret);
15211}
15212
15213/**
15214 * xmlReadDoc:
15215 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015216 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015217 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015218 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015219 *
15220 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015221 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015222 * Returns the resulting document tree
15223 */
15224xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015225xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015226{
15227 xmlParserCtxtPtr ctxt;
15228
15229 if (cur == NULL)
15230 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015231 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015232
15233 ctxt = xmlCreateDocParserCtxt(cur);
15234 if (ctxt == NULL)
15235 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015236 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015237}
15238
15239/**
15240 * xmlReadFile:
15241 * @filename: a file or URL
15242 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015243 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015244 *
15245 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015246 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247 * Returns the resulting document tree
15248 */
15249xmlDocPtr
15250xmlReadFile(const char *filename, const char *encoding, int options)
15251{
15252 xmlParserCtxtPtr ctxt;
15253
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015254 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015255 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015256 if (ctxt == NULL)
15257 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015258 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015259}
15260
15261/**
15262 * xmlReadMemory:
15263 * @buffer: a pointer to a char array
15264 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015265 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015267 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015268 *
15269 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015270 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015271 * Returns the resulting document tree
15272 */
15273xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015274xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015275{
15276 xmlParserCtxtPtr ctxt;
15277
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015278 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015279 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15280 if (ctxt == NULL)
15281 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015282 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015283}
15284
15285/**
15286 * xmlReadFd:
15287 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015288 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015289 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015290 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291 *
15292 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015293 * NOTE that the file descriptor will not be closed when the
15294 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015295 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015296 * Returns the resulting document tree
15297 */
15298xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015299xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015300{
15301 xmlParserCtxtPtr ctxt;
15302 xmlParserInputBufferPtr input;
15303 xmlParserInputPtr stream;
15304
15305 if (fd < 0)
15306 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015307 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015308
15309 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15310 if (input == NULL)
15311 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015312 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015313 ctxt = xmlNewParserCtxt();
15314 if (ctxt == NULL) {
15315 xmlFreeParserInputBuffer(input);
15316 return (NULL);
15317 }
15318 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15319 if (stream == NULL) {
15320 xmlFreeParserInputBuffer(input);
15321 xmlFreeParserCtxt(ctxt);
15322 return (NULL);
15323 }
15324 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015325 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015326}
15327
15328/**
15329 * xmlReadIO:
15330 * @ioread: an I/O read function
15331 * @ioclose: an I/O close function
15332 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015333 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015334 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015335 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015336 *
15337 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015338 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015339 * Returns the resulting document tree
15340 */
15341xmlDocPtr
15342xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015343 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015344{
15345 xmlParserCtxtPtr ctxt;
15346 xmlParserInputBufferPtr input;
15347 xmlParserInputPtr stream;
15348
15349 if (ioread == NULL)
15350 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015351 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015352
15353 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15354 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015355 if (input == NULL) {
15356 if (ioclose != NULL)
15357 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015358 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015359 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015360 ctxt = xmlNewParserCtxt();
15361 if (ctxt == NULL) {
15362 xmlFreeParserInputBuffer(input);
15363 return (NULL);
15364 }
15365 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15366 if (stream == NULL) {
15367 xmlFreeParserInputBuffer(input);
15368 xmlFreeParserCtxt(ctxt);
15369 return (NULL);
15370 }
15371 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015372 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015373}
15374
15375/**
15376 * xmlCtxtReadDoc:
15377 * @ctxt: an XML parser context
15378 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015379 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015380 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015381 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015382 *
15383 * parse an XML in-memory document and build a tree.
15384 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015385 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015386 * Returns the resulting document tree
15387 */
15388xmlDocPtr
15389xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015390 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015391{
15392 xmlParserInputPtr stream;
15393
15394 if (cur == NULL)
15395 return (NULL);
15396 if (ctxt == NULL)
15397 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015398 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015399
15400 xmlCtxtReset(ctxt);
15401
15402 stream = xmlNewStringInputStream(ctxt, cur);
15403 if (stream == NULL) {
15404 return (NULL);
15405 }
15406 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015407 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408}
15409
15410/**
15411 * xmlCtxtReadFile:
15412 * @ctxt: an XML parser context
15413 * @filename: a file or URL
15414 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015415 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015416 *
15417 * parse an XML file from the filesystem or the network.
15418 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015419 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420 * Returns the resulting document tree
15421 */
15422xmlDocPtr
15423xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15424 const char *encoding, int options)
15425{
15426 xmlParserInputPtr stream;
15427
15428 if (filename == NULL)
15429 return (NULL);
15430 if (ctxt == NULL)
15431 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015432 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015433
15434 xmlCtxtReset(ctxt);
15435
Daniel Veillard29614c72004-11-26 10:47:26 +000015436 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015437 if (stream == NULL) {
15438 return (NULL);
15439 }
15440 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015441 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015442}
15443
15444/**
15445 * xmlCtxtReadMemory:
15446 * @ctxt: an XML parser context
15447 * @buffer: a pointer to a char array
15448 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015449 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015450 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015451 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015452 *
15453 * parse an XML in-memory document and build a tree.
15454 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015455 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015456 * Returns the resulting document tree
15457 */
15458xmlDocPtr
15459xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015460 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015461{
15462 xmlParserInputBufferPtr input;
15463 xmlParserInputPtr stream;
15464
15465 if (ctxt == NULL)
15466 return (NULL);
15467 if (buffer == NULL)
15468 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015469 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015470
15471 xmlCtxtReset(ctxt);
15472
15473 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15474 if (input == NULL) {
15475 return(NULL);
15476 }
15477
15478 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15479 if (stream == NULL) {
15480 xmlFreeParserInputBuffer(input);
15481 return(NULL);
15482 }
15483
15484 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015485 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015486}
15487
15488/**
15489 * xmlCtxtReadFd:
15490 * @ctxt: an XML parser context
15491 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015492 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015493 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015494 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015495 *
15496 * parse an XML from a file descriptor and build a tree.
15497 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015498 * NOTE that the file descriptor will not be closed when the
15499 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015500 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015501 * Returns the resulting document tree
15502 */
15503xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015504xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15505 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015506{
15507 xmlParserInputBufferPtr input;
15508 xmlParserInputPtr stream;
15509
15510 if (fd < 0)
15511 return (NULL);
15512 if (ctxt == NULL)
15513 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015514 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015515
15516 xmlCtxtReset(ctxt);
15517
15518
15519 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15520 if (input == NULL)
15521 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015522 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015523 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15524 if (stream == NULL) {
15525 xmlFreeParserInputBuffer(input);
15526 return (NULL);
15527 }
15528 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015529 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015530}
15531
15532/**
15533 * xmlCtxtReadIO:
15534 * @ctxt: an XML parser context
15535 * @ioread: an I/O read function
15536 * @ioclose: an I/O close function
15537 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015538 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015539 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015540 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015541 *
15542 * parse an XML document from I/O functions and source and build a tree.
15543 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015544 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015545 * Returns the resulting document tree
15546 */
15547xmlDocPtr
15548xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15549 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015550 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015551 const char *encoding, int options)
15552{
15553 xmlParserInputBufferPtr input;
15554 xmlParserInputPtr stream;
15555
15556 if (ioread == NULL)
15557 return (NULL);
15558 if (ctxt == NULL)
15559 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015560 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015561
15562 xmlCtxtReset(ctxt);
15563
15564 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15565 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015566 if (input == NULL) {
15567 if (ioclose != NULL)
15568 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015569 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015570 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015571 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15572 if (stream == NULL) {
15573 xmlFreeParserInputBuffer(input);
15574 return (NULL);
15575 }
15576 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015577 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015578}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015579
15580#define bottom_parser
15581#include "elfgcchack.h"