blob: 9a7135fd1f92254f0893f888def672071001b5e4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Daniel Veillard3c5ed912002-01-08 10:36:16 +000041#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
86#ifdef HAVE_ZLIB_H
87#include <zlib.h>
88#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020089#ifdef HAVE_LZMA_H
90#include <lzma.h>
91#endif
Owen Taylor3473f882001-02-23 17:55:21 +000092
Daniel Veillard768eb3b2012-07-16 14:19:49 +080093#include "buf.h"
94#include "enc.h"
95
Daniel Veillard0161e632008-08-28 15:36:32 +000096static void
97xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
98
Rob Richards9c0aa472009-03-26 18:10:19 +000099static xmlParserCtxtPtr
100xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101 const xmlChar *base, xmlParserCtxtPtr pctx);
102
Daniel Veillard28cd9cb2015-11-20 14:55:30 +0800103static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
Daniel Veillard0161e632008-08-28 15:36:32 +0000105/************************************************************************
106 * *
107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
108 * *
109 ************************************************************************/
110
111#define XML_PARSER_BIG_ENTITY 1000
112#define XML_PARSER_LOT_ENTITY 5000
113
114/*
115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116 * replacement over the size in byte of the input indicates that you have
117 * and eponential behaviour. A value of 10 correspond to at least 3 entity
118 * replacement per byte of input.
119 */
120#define XML_PARSER_NON_LINEAR 10
121
122/*
123 * xmlParserEntityCheck
124 *
125 * Function to check non-linear entity expansion behaviour
126 * This is here to detect and stop exponential linear entity expansion
127 * This is not a limitation of the parser but a safety
128 * boundary feature. It can be disabled with the XML_PARSE_HUGE
129 * parser option.
130 */
131static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800132xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000134{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800135 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000136
137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
138 return (0);
139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
140 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800141
142 /*
143 * This may look absurd but is needed to detect
144 * entities problems
145 */
146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800147 (ent->content != NULL) && (ent->checked == 0) &&
148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149 unsigned long oldnbent = ctxt->nbentities;
150 xmlChar *rep;
151
152 ent->checked = 1;
153
Peter Simons8f30bdf2016-04-15 11:56:55 +0200154 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800155 rep = xmlStringDecodeEntities(ctxt, ent->content,
156 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200157 --ctxt->depth;
Daniel Veillardbdd66182016-05-23 12:27:58 +0800158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
159 ent->content[0] = 0;
160 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800161
162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
163 if (rep != NULL) {
164 if (xmlStrchr(rep, '<'))
165 ent->checked |= 1;
166 xmlFree(rep);
167 rep = NULL;
168 }
169 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800170 if (replacement != 0) {
171 if (replacement < XML_MAX_TEXT_LENGTH)
172 return(0);
173
174 /*
175 * If the volume of entity copy reaches 10 times the
176 * amount of parsed data and over the large text threshold
177 * then that's very likely to be an abuse.
178 */
179 if (ctxt->input != NULL) {
180 consumed = ctxt->input->consumed +
181 (ctxt->input->cur - ctxt->input->base);
182 }
183 consumed += ctxt->sizeentities;
184
185 if (replacement < XML_PARSER_NON_LINEAR * consumed)
186 return(0);
187 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000188 /*
189 * Do the check based on the replacement size of the entity
190 */
191 if (size < XML_PARSER_BIG_ENTITY)
192 return(0);
193
194 /*
195 * A limit on the amount of text data reasonably used
196 */
197 if (ctxt->input != NULL) {
198 consumed = ctxt->input->consumed +
199 (ctxt->input->cur - ctxt->input->base);
200 }
201 consumed += ctxt->sizeentities;
202
203 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
205 return (0);
206 } else if (ent != NULL) {
207 /*
208 * use the number of parsed entities in the replacement
209 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800210 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212 /*
213 * The amount of data parsed counting entities size only once
214 */
215 if (ctxt->input != NULL) {
216 consumed = ctxt->input->consumed +
217 (ctxt->input->cur - ctxt->input->base);
218 }
219 consumed += ctxt->sizeentities;
220
221 /*
222 * Check the density of entities for the amount of data
223 * knowing an entity reference will take at least 3 bytes
224 */
225 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
226 return (0);
227 } else {
228 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800229 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800231 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233 (ctxt->nbentities <= 10000))
234 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000235 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
237 return (1);
238}
239
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000240/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000241 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000242 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000243 * arbitrary depth limit for the XML documents that we allow to
244 * process. This is not a limitation of the parser but a safety
245 * boundary feature. It can be disabled with the XML_PARSE_HUGE
246 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000247 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000248unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000249
Daniel Veillard0fb18932003-09-07 09:14:37 +0000250
Daniel Veillard0161e632008-08-28 15:36:32 +0000251
252#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000253#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000254#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000255#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
256
Daniel Veillard1f972e92012-08-15 10:16:37 +0800257/**
258 * XML_PARSER_CHUNK_SIZE
259 *
260 * When calling GROW that's the minimal amount of data
261 * the parser expected to have received. It is not a hard
262 * limit but an optimization when reading strings like Names
263 * It is not strictly needed as long as inputs available characters
264 * are followed by 0, which should be provided by the I/O level
265 */
266#define XML_PARSER_CHUNK_SIZE 100
267
Owen Taylor3473f882001-02-23 17:55:21 +0000268/*
Owen Taylor3473f882001-02-23 17:55:21 +0000269 * List of XML prefixed PI allowed by W3C specs
270 */
271
Daniel Veillardb44025c2001-10-11 22:55:55 +0000272static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000273 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800274 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000275 NULL
276};
277
Daniel Veillarda07050d2003-10-19 14:46:32 +0000278
Owen Taylor3473f882001-02-23 17:55:21 +0000279/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200280static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard7d515752003-09-26 19:12:37 +0000283static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000284xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000286 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000287 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000288
Daniel Veillard37334572008-07-31 08:20:02 +0000289static int
290xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000292#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000293static void
294xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000296#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000297
Daniel Veillard7d515752003-09-26 19:12:37 +0000298static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000299xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000301
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000302static int
303xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
304
Daniel Veillarde57ec792003-09-10 10:50:59 +0000305/************************************************************************
306 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800307 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 * *
309 ************************************************************************/
310
311/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 * xmlErrAttributeDup:
313 * @ctxt: an XML parser context
314 * @prefix: the attribute prefix
315 * @localname: the attribute localname
316 *
317 * Handle a redefinition of attribute error
318 */
319static void
320xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321 const xmlChar * localname)
322{
Daniel Veillard157fee02003-10-31 10:36:03 +0000323 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324 (ctxt->instate == XML_PARSER_EOF))
325 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000326 if (ctxt != NULL)
327 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200328
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000329 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 (const char *) localname, NULL, NULL, 0, 0,
333 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000334 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000335 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200336 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 (const char *) prefix, (const char *) localname,
338 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
339 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000340 if (ctxt != NULL) {
341 ctxt->wellFormed = 0;
342 if (ctxt->recovery == 0)
343 ctxt->disableSAX = 1;
344 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345}
346
347/**
348 * xmlFatalErr:
349 * @ctxt: an XML parser context
350 * @error: the error number
351 * @extra: extra information string
352 *
353 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
354 */
355static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357{
358 const char *errmsg;
359
Daniel Veillard157fee02003-10-31 10:36:03 +0000360 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361 (ctxt->instate == XML_PARSER_EOF))
362 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 switch (error) {
364 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 errmsg = "internal error";
375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800437 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Fragment not allowed";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800461 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800464 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000473 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800474 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000475 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800477 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 break;
479 case XML_ERR_CONDSEC_INVALID_KEYWORD:
480 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800502 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800505 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000506 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000507 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800508 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000509 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800511 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000512 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000513 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000525 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000528 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000530 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000531 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800532 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800534 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800535 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800536 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000537#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000538 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800539 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000540 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000541#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800543 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000544 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000545 if (ctxt != NULL)
546 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800547 if (info == NULL) {
548 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
550 errmsg);
551 } else {
552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
554 errmsg, info);
555 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000556 if (ctxt != NULL) {
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
560 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000561}
562
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000563/**
564 * xmlFatalErrMsg:
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
568 *
569 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
570 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800571static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000572xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
573 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574{
Daniel Veillard157fee02003-10-31 10:36:03 +0000575 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576 (ctxt->instate == XML_PARSER_EOF))
577 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000578 if (ctxt != NULL)
579 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000580 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200581 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000582 if (ctxt != NULL) {
583 ctxt->wellFormed = 0;
584 if (ctxt->recovery == 0)
585 ctxt->disableSAX = 1;
586 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000587}
588
589/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000590 * xmlWarningMsg:
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
594 * @str1: extra data
595 * @str2: extra data
596 *
597 * Handle a warning.
598 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800599static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000600xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601 const char *msg, const xmlChar *str1, const xmlChar *str2)
602{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000603 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000604
Daniel Veillard157fee02003-10-31 10:36:03 +0000605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
607 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000608 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000610 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200611 if (ctxt != NULL) {
612 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000613 (ctxt->sax) ? ctxt->sax->warning : NULL,
614 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200619 } else {
620 __xmlRaiseError(schannel, NULL, NULL,
621 ctxt, NULL, XML_FROM_PARSER, error,
622 XML_ERR_WARNING, NULL, 0,
623 (const char *) str1, (const char *) str2, NULL, 0, 0,
624 msg, (const char *) str1, (const char *) str2);
625 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000626}
627
628/**
629 * xmlValidityError:
630 * @ctxt: an XML parser context
631 * @error: the error number
632 * @msg: the error message
633 * @str1: extra data
634 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000635 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000636 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800637static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000638xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000639 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000640{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000641 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000642
643 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644 (ctxt->instate == XML_PARSER_EOF))
645 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000646 if (ctxt != NULL) {
647 ctxt->errNo = error;
648 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649 schannel = ctxt->sax->serror;
650 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200651 if (ctxt != NULL) {
652 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000653 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000654 ctxt, NULL, XML_FROM_DTD, error,
655 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000656 (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000658 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200659 } else {
660 __xmlRaiseError(schannel, NULL, NULL,
661 ctxt, NULL, XML_FROM_DTD, error,
662 XML_ERR_ERROR, NULL, 0, (const char *) str1,
663 (const char *) str2, NULL, 0, 0,
664 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000665 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000666}
667
668/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000669 * xmlFatalErrMsgInt:
670 * @ctxt: an XML parser context
671 * @error: the error number
672 * @msg: the error message
673 * @val: an integer value
674 *
675 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
676 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800677static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000678xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000679 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000680{
Daniel Veillard157fee02003-10-31 10:36:03 +0000681 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682 (ctxt->instate == XML_PARSER_EOF))
683 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000684 if (ctxt != NULL)
685 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000686 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000687 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000689 if (ctxt != NULL) {
690 ctxt->wellFormed = 0;
691 if (ctxt->recovery == 0)
692 ctxt->disableSAX = 1;
693 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000694}
695
696/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000697 * xmlFatalErrMsgStrIntStr:
698 * @ctxt: an XML parser context
699 * @error: the error number
700 * @msg: the error message
701 * @str1: an string info
702 * @val: an integer value
703 * @str2: an string info
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800707static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000708xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800709 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000710 const xmlChar *str2)
711{
Daniel Veillard157fee02003-10-31 10:36:03 +0000712 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713 (ctxt->instate == XML_PARSER_EOF))
714 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000715 if (ctxt != NULL)
716 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000717 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000718 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719 NULL, 0, (const char *) str1, (const char *) str2,
720 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000721 if (ctxt != NULL) {
722 ctxt->wellFormed = 0;
723 if (ctxt->recovery == 0)
724 ctxt->disableSAX = 1;
725 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000726}
727
728/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000729 * xmlFatalErrMsgStr:
730 * @ctxt: an XML parser context
731 * @error: the error number
732 * @msg: the error message
733 * @val: a string value
734 *
735 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
736 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800737static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000738xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000739 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000740{
Daniel Veillard157fee02003-10-31 10:36:03 +0000741 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742 (ctxt->instate == XML_PARSER_EOF))
743 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000744 if (ctxt != NULL)
745 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000746 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000747 XML_FROM_PARSER, error, XML_ERR_FATAL,
748 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
749 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000750 if (ctxt != NULL) {
751 ctxt->wellFormed = 0;
752 if (ctxt->recovery == 0)
753 ctxt->disableSAX = 1;
754 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000755}
756
757/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000758 * xmlErrMsgStr:
759 * @ctxt: an XML parser context
760 * @error: the error number
761 * @msg: the error message
762 * @val: a string value
763 *
764 * Handle a non fatal parser error
765 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800766static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000767xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768 const char *msg, const xmlChar * val)
769{
Daniel Veillard157fee02003-10-31 10:36:03 +0000770 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771 (ctxt->instate == XML_PARSER_EOF))
772 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000773 if (ctxt != NULL)
774 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000775 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000776 XML_FROM_PARSER, error, XML_ERR_ERROR,
777 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
778 val);
779}
780
781/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000782 * xmlNsErr:
783 * @ctxt: an XML parser context
784 * @error: the error number
785 * @msg: the message
786 * @info1: extra information string
787 * @info2: extra information string
788 *
789 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
790 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800791static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000792xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
793 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000794 const xmlChar * info1, const xmlChar * info2,
795 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000796{
Daniel Veillard157fee02003-10-31 10:36:03 +0000797 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798 (ctxt->instate == XML_PARSER_EOF))
799 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000800 if (ctxt != NULL)
801 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000802 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000803 XML_ERR_ERROR, NULL, 0, (const char *) info1,
804 (const char *) info2, (const char *) info3, 0, 0, msg,
805 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000806 if (ctxt != NULL)
807 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000808}
809
Daniel Veillard37334572008-07-31 08:20:02 +0000810/**
811 * xmlNsWarn
812 * @ctxt: an XML parser context
813 * @error: the error number
814 * @msg: the message
815 * @info1: extra information string
816 * @info2: extra information string
817 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800818 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000819 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800820static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000821xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
822 const char *msg,
823 const xmlChar * info1, const xmlChar * info2,
824 const xmlChar * info3)
825{
826 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827 (ctxt->instate == XML_PARSER_EOF))
828 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000829 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830 XML_ERR_WARNING, NULL, 0, (const char *) info1,
831 (const char *) info2, (const char *) info3, 0, 0, msg,
832 info1, info2, info3);
833}
834
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000835/************************************************************************
836 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800837 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838 * *
839 ************************************************************************/
840
841/**
842 * xmlHasFeature:
843 * @feature: the feature to be examined
844 *
845 * Examines if the library has been compiled with a given feature.
846 *
847 * Returns a non-zero value if the feature exist, otherwise zero.
848 * Returns zero (0) if the feature does not exist or an unknown
849 * unknown feature is requested, non-zero otherwise.
850 */
851int
852xmlHasFeature(xmlFeature feature)
853{
854 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_THREAD_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_TREE_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_OUTPUT_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_PUSH_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_READER_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_PATTERN_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_WRITER_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_SAX1_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_FTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_HTTP_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_VALID_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_HTML_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_LEGACY_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_C14N_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef LIBXML_CATALOG_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_XPATH_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000951 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000952#ifdef LIBXML_XPTR_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000957 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000958#ifdef LIBXML_XINCLUDE_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000963 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000964#ifdef LIBXML_ICONV_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000969 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000970#ifdef LIBXML_ISO8859X_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000975 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000976#ifdef LIBXML_UNICODE_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000981 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000982#ifdef LIBXML_REGEXP_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000987 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000988#ifdef LIBXML_AUTOMATA_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000993 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000994#ifdef LIBXML_EXPR_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000999 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001000#ifdef LIBXML_SCHEMAS_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001005 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001006#ifdef LIBXML_SCHEMATRON_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001011 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012#ifdef LIBXML_MODULES_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001017 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001018#ifdef LIBXML_DEBUG_ENABLED
1019 return(1);
1020#else
1021 return(0);
1022#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001023 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001024#ifdef DEBUG_MEMORY_LOCATION
1025 return(1);
1026#else
1027 return(0);
1028#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001029 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001030#ifdef LIBXML_DEBUG_RUNTIME
1031 return(1);
1032#else
1033 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001034#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001035 case XML_WITH_ZLIB:
1036#ifdef LIBXML_ZLIB_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001041 case XML_WITH_LZMA:
1042#ifdef LIBXML_LZMA_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001047 case XML_WITH_ICU:
1048#ifdef LIBXML_ICU_ENABLED
1049 return(1);
1050#else
1051 return(0);
1052#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001053 default:
1054 break;
1055 }
1056 return(0);
1057}
1058
1059/************************************************************************
1060 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001061 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 * *
1063 ************************************************************************/
1064
1065/**
1066 * xmlDetectSAX2:
1067 * @ctxt: an XML parser context
1068 *
1069 * Do the SAX2 detection and specific intialization
1070 */
1071static void
1072xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001074#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001075 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076 ((ctxt->sax->startElementNs != NULL) ||
1077 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001078#else
1079 ctxt->sax2 = 1;
1080#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001081
1082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001085 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001087 xmlErrMemory(ctxt, NULL);
1088 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001089}
1090
Daniel Veillarde57ec792003-09-10 10:50:59 +00001091typedef struct _xmlDefAttrs xmlDefAttrs;
1092typedef xmlDefAttrs *xmlDefAttrsPtr;
1093struct _xmlDefAttrs {
1094 int nbAttrs; /* number of defaulted attributes on that element */
1095 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001096#if __STDC_VERSION__ >= 199901L
1097 /* Using a C99 flexible array member avoids UBSan errors. */
1098 const xmlChar *values[]; /* array of localname/prefix/values/external */
1099#else
1100 const xmlChar *values[5];
1101#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103
1104/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001105 * xmlAttrNormalizeSpace:
1106 * @src: the source string
1107 * @dst: the target string
1108 *
1109 * Normalize the space in non CDATA attribute values:
1110 * If the attribute type is not CDATA, then the XML processor MUST further
1111 * process the normalized attribute value by discarding any leading and
1112 * trailing space (#x20) characters, and by replacing sequences of space
1113 * (#x20) characters by a single space (#x20) character.
1114 * Note that the size of dst need to be at least src, and if one doesn't need
1115 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116 * passing src as dst is just fine.
1117 *
1118 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1119 * is needed.
1120 */
1121static xmlChar *
1122xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1123{
1124 if ((src == NULL) || (dst == NULL))
1125 return(NULL);
1126
1127 while (*src == 0x20) src++;
1128 while (*src != 0) {
1129 if (*src == 0x20) {
1130 while (*src == 0x20) src++;
1131 if (*src != 0)
1132 *dst++ = 0x20;
1133 } else {
1134 *dst++ = *src++;
1135 }
1136 }
1137 *dst = 0;
1138 if (dst == src)
1139 return(NULL);
1140 return(dst);
1141}
1142
1143/**
1144 * xmlAttrNormalizeSpace2:
1145 * @src: the source string
1146 *
1147 * Normalize the space in non CDATA attribute values, a slightly more complex
1148 * front end to avoid allocation problems when running on attribute values
1149 * coming from the input.
1150 *
1151 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1152 * is needed.
1153 */
1154static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001155xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001156{
1157 int i;
1158 int remove_head = 0;
1159 int need_realloc = 0;
1160 const xmlChar *cur;
1161
1162 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1163 return(NULL);
1164 i = *len;
1165 if (i <= 0)
1166 return(NULL);
1167
1168 cur = src;
1169 while (*cur == 0x20) {
1170 cur++;
1171 remove_head++;
1172 }
1173 while (*cur != 0) {
1174 if (*cur == 0x20) {
1175 cur++;
1176 if ((*cur == 0x20) || (*cur == 0)) {
1177 need_realloc = 1;
1178 break;
1179 }
1180 } else
1181 cur++;
1182 }
1183 if (need_realloc) {
1184 xmlChar *ret;
1185
1186 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1187 if (ret == NULL) {
1188 xmlErrMemory(ctxt, NULL);
1189 return(NULL);
1190 }
1191 xmlAttrNormalizeSpace(ret, ret);
1192 *len = (int) strlen((const char *)ret);
1193 return(ret);
1194 } else if (remove_head) {
1195 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001196 memmove(src, src + remove_head, 1 + *len);
1197 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001198 }
1199 return(NULL);
1200}
1201
1202/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001203 * xmlAddDefAttrs:
1204 * @ctxt: an XML parser context
1205 * @fullname: the element fullname
1206 * @fullattr: the attribute fullname
1207 * @value: the attribute value
1208 *
1209 * Add a defaulted attribute for an element
1210 */
1211static void
1212xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213 const xmlChar *fullname,
1214 const xmlChar *fullattr,
1215 const xmlChar *value) {
1216 xmlDefAttrsPtr defaults;
1217 int len;
1218 const xmlChar *name;
1219 const xmlChar *prefix;
1220
Daniel Veillard6a31b832008-03-26 14:06:44 +00001221 /*
1222 * Allows to detect attribute redefinitions
1223 */
1224 if (ctxt->attsSpecial != NULL) {
1225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1226 return;
1227 }
1228
Daniel Veillarde57ec792003-09-10 10:50:59 +00001229 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001230 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001231 if (ctxt->attsDefault == NULL)
1232 goto mem_error;
1233 }
1234
1235 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001236 * split the element name into prefix:localname , the string found
1237 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001238 */
1239 name = xmlSplitQName3(fullname, &len);
1240 if (name == NULL) {
1241 name = xmlDictLookup(ctxt->dict, fullname, -1);
1242 prefix = NULL;
1243 } else {
1244 name = xmlDictLookup(ctxt->dict, name, -1);
1245 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1246 }
1247
1248 /*
1249 * make sure there is some storage
1250 */
1251 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252 if (defaults == NULL) {
1253 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001254 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001255 if (defaults == NULL)
1256 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001257 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001258 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001259 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260 defaults, NULL) < 0) {
1261 xmlFree(defaults);
1262 goto mem_error;
1263 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001265 xmlDefAttrsPtr temp;
1266
1267 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001268 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001269 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001270 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001271 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001272 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001273 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274 defaults, NULL) < 0) {
1275 xmlFree(defaults);
1276 goto mem_error;
1277 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001278 }
1279
1280 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001281 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001282 * are within the DTD and hen not associated to namespace names.
1283 */
1284 name = xmlSplitQName3(fullattr, &len);
1285 if (name == NULL) {
1286 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1287 prefix = NULL;
1288 } else {
1289 name = xmlDictLookup(ctxt->dict, name, -1);
1290 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1291 }
1292
Daniel Veillardae0765b2008-07-31 19:54:59 +00001293 defaults->values[5 * defaults->nbAttrs] = name;
1294 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001295 /* intern the string and precompute the end */
1296 len = xmlStrlen(value);
1297 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001298 defaults->values[5 * defaults->nbAttrs + 2] = value;
1299 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1300 if (ctxt->external)
1301 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1302 else
1303 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001304 defaults->nbAttrs++;
1305
1306 return;
1307
1308mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001309 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001310 return;
1311}
1312
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001313/**
1314 * xmlAddSpecialAttr:
1315 * @ctxt: an XML parser context
1316 * @fullname: the element fullname
1317 * @fullattr: the attribute fullname
1318 * @type: the attribute type
1319 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001320 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001321 */
1322static void
1323xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324 const xmlChar *fullname,
1325 const xmlChar *fullattr,
1326 int type)
1327{
1328 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001329 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001330 if (ctxt->attsSpecial == NULL)
1331 goto mem_error;
1332 }
1333
Daniel Veillardac4118d2008-01-11 05:27:32 +00001334 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1335 return;
1336
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001337 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001339 return;
1340
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001343 return;
1344}
1345
Daniel Veillard4432df22003-09-28 18:58:27 +00001346/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001347 * xmlCleanSpecialAttrCallback:
1348 *
1349 * Removes CDATA attributes from the special attribute table
1350 */
1351static void
1352xmlCleanSpecialAttrCallback(void *payload, void *data,
1353 const xmlChar *fullname, const xmlChar *fullattr,
1354 const xmlChar *unused ATTRIBUTE_UNUSED) {
1355 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1356
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001357 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001358 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1359 }
1360}
1361
1362/**
1363 * xmlCleanSpecialAttr:
1364 * @ctxt: an XML parser context
1365 *
1366 * Trim the list of attributes defined to remove all those of type
1367 * CDATA as they are not special. This call should be done when finishing
1368 * to parse the DTD and before starting to parse the document root.
1369 */
1370static void
1371xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1372{
1373 if (ctxt->attsSpecial == NULL)
1374 return;
1375
1376 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1377
1378 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379 xmlHashFree(ctxt->attsSpecial, NULL);
1380 ctxt->attsSpecial = NULL;
1381 }
1382 return;
1383}
1384
1385/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001386 * xmlCheckLanguageID:
1387 * @lang: pointer to the string value
1388 *
1389 * Checks that the value conforms to the LanguageID production:
1390 *
1391 * NOTE: this is somewhat deprecated, those productions were removed from
1392 * the XML Second edition.
1393 *
1394 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1396 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399 * [38] Subcode ::= ([a-z] | [A-Z])+
1400 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001401 * The current REC reference the sucessors of RFC 1766, currently 5646
1402 *
1403 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404 * langtag = language
1405 * ["-" script]
1406 * ["-" region]
1407 * *("-" variant)
1408 * *("-" extension)
1409 * ["-" privateuse]
1410 * language = 2*3ALPHA ; shortest ISO 639 code
1411 * ["-" extlang] ; sometimes followed by
1412 * ; extended language subtags
1413 * / 4ALPHA ; or reserved for future use
1414 * / 5*8ALPHA ; or registered language subtag
1415 *
1416 * extlang = 3ALPHA ; selected ISO 639 codes
1417 * *2("-" 3ALPHA) ; permanently reserved
1418 *
1419 * script = 4ALPHA ; ISO 15924 code
1420 *
1421 * region = 2ALPHA ; ISO 3166-1 code
1422 * / 3DIGIT ; UN M.49 code
1423 *
1424 * variant = 5*8alphanum ; registered variants
1425 * / (DIGIT 3alphanum)
1426 *
1427 * extension = singleton 1*("-" (2*8alphanum))
1428 *
1429 * ; Single alphanumerics
1430 * ; "x" reserved for private use
1431 * singleton = DIGIT ; 0 - 9
1432 * / %x41-57 ; A - W
1433 * / %x59-5A ; Y - Z
1434 * / %x61-77 ; a - w
1435 * / %x79-7A ; y - z
1436 *
1437 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438 * The parser below doesn't try to cope with extension or privateuse
1439 * that could be added but that's not interoperable anyway
1440 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001441 * Returns 1 if correct 0 otherwise
1442 **/
1443int
1444xmlCheckLanguageID(const xmlChar * lang)
1445{
Daniel Veillard60587d62010-11-04 15:16:27 +01001446 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001447
1448 if (cur == NULL)
1449 return (0);
1450 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001451 ((cur[0] == 'I') && (cur[1] == '-')) ||
1452 ((cur[0] == 'x') && (cur[1] == '-')) ||
1453 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001454 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001455 * Still allow IANA code and user code which were coming
1456 * from the previous version of the XML-1.0 specification
1457 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001458 */
1459 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001460 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001461 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1462 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001463 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001464 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001465 nxt = cur;
1466 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1468 nxt++;
1469 if (nxt - cur >= 4) {
1470 /*
1471 * Reserved
1472 */
1473 if ((nxt - cur > 8) || (nxt[0] != 0))
1474 return(0);
1475 return(1);
1476 }
1477 if (nxt - cur < 2)
1478 return(0);
1479 /* we got an ISO 639 code */
1480 if (nxt[0] == 0)
1481 return(1);
1482 if (nxt[0] != '-')
1483 return(0);
1484
1485 nxt++;
1486 cur = nxt;
1487 /* now we can have extlang or script or region or variant */
1488 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1489 goto region_m49;
1490
1491 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1493 nxt++;
1494 if (nxt - cur == 4)
1495 goto script;
1496 if (nxt - cur == 2)
1497 goto region;
1498 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1499 goto variant;
1500 if (nxt - cur != 3)
1501 return(0);
1502 /* we parsed an extlang */
1503 if (nxt[0] == 0)
1504 return(1);
1505 if (nxt[0] != '-')
1506 return(0);
1507
1508 nxt++;
1509 cur = nxt;
1510 /* now we can have script or region or variant */
1511 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1512 goto region_m49;
1513
1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1516 nxt++;
1517 if (nxt - cur == 2)
1518 goto region;
1519 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1520 goto variant;
1521 if (nxt - cur != 4)
1522 return(0);
1523 /* we parsed a script */
1524script:
1525 if (nxt[0] == 0)
1526 return(1);
1527 if (nxt[0] != '-')
1528 return(0);
1529
1530 nxt++;
1531 cur = nxt;
1532 /* now we can have region or variant */
1533 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1534 goto region_m49;
1535
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538 nxt++;
1539
1540 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1541 goto variant;
1542 if (nxt - cur != 2)
1543 return(0);
1544 /* we parsed a region */
1545region:
1546 if (nxt[0] == 0)
1547 return(1);
1548 if (nxt[0] != '-')
1549 return(0);
1550
1551 nxt++;
1552 cur = nxt;
1553 /* now we can just have a variant */
1554 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1556 nxt++;
1557
1558 if ((nxt - cur < 5) || (nxt - cur > 8))
1559 return(0);
1560
1561 /* we parsed a variant */
1562variant:
1563 if (nxt[0] == 0)
1564 return(1);
1565 if (nxt[0] != '-')
1566 return(0);
1567 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001568 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001569
1570region_m49:
1571 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1573 nxt += 3;
1574 goto region;
1575 }
1576 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001577}
1578
Owen Taylor3473f882001-02-23 17:55:21 +00001579/************************************************************************
1580 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001581 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001582 * *
1583 ************************************************************************/
1584
Daniel Veillard8ed10722009-08-20 19:17:36 +02001585static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001587
Daniel Veillard0fb18932003-09-07 09:14:37 +00001588#ifdef SAX2
1589/**
1590 * nsPush:
1591 * @ctxt: an XML parser context
1592 * @prefix: the namespace prefix or NULL
1593 * @URL: the namespace name
1594 *
1595 * Pushes a new parser namespace on top of the ns stack
1596 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001597 * Returns -1 in case of error, -2 if the namespace should be discarded
1598 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001599 */
1600static int
1601nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1602{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001603 if (ctxt->options & XML_PARSE_NSCLEAN) {
1604 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001605 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001606 if (ctxt->nsTab[i] == prefix) {
1607 /* in scope */
1608 if (ctxt->nsTab[i + 1] == URL)
1609 return(-2);
1610 /* out of scope keep it */
1611 break;
1612 }
1613 }
1614 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001615 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1616 ctxt->nsMax = 10;
1617 ctxt->nsNr = 0;
1618 ctxt->nsTab = (const xmlChar **)
1619 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001621 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001622 ctxt->nsMax = 0;
1623 return (-1);
1624 }
1625 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001626 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001627 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001628 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1630 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001631 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001632 ctxt->nsMax /= 2;
1633 return (-1);
1634 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001635 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 }
1637 ctxt->nsTab[ctxt->nsNr++] = prefix;
1638 ctxt->nsTab[ctxt->nsNr++] = URL;
1639 return (ctxt->nsNr);
1640}
1641/**
1642 * nsPop:
1643 * @ctxt: an XML parser context
1644 * @nr: the number to pop
1645 *
1646 * Pops the top @nr parser prefix/namespace from the ns stack
1647 *
1648 * Returns the number of namespaces removed
1649 */
1650static int
1651nsPop(xmlParserCtxtPtr ctxt, int nr)
1652{
1653 int i;
1654
1655 if (ctxt->nsTab == NULL) return(0);
1656 if (ctxt->nsNr < nr) {
1657 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1658 nr = ctxt->nsNr;
1659 }
1660 if (ctxt->nsNr <= 0)
1661 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001662
Daniel Veillard0fb18932003-09-07 09:14:37 +00001663 for (i = 0;i < nr;i++) {
1664 ctxt->nsNr--;
1665 ctxt->nsTab[ctxt->nsNr] = NULL;
1666 }
1667 return(nr);
1668}
1669#endif
1670
1671static int
1672xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001674 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001675 int maxatts;
1676
1677 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001679 atts = (const xmlChar **)
1680 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001682 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001683 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001686 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 } else if (nr + 5 > ctxt->maxatts) {
1688 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001689 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001691 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001693 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694 (maxatts / 5) * sizeof(int));
1695 if (attallocs == NULL) goto mem_error;
1696 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001697 ctxt->maxatts = maxatts;
1698 }
1699 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001700mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001701 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001702 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001703}
1704
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001705/**
1706 * inputPush:
1707 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001708 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001709 *
1710 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001711 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001712 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001713 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001714int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1716{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001717 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001718 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001719 if (ctxt->inputNr >= ctxt->inputMax) {
1720 ctxt->inputMax *= 2;
1721 ctxt->inputTab =
1722 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1723 ctxt->inputMax *
1724 sizeof(ctxt->inputTab[0]));
1725 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001726 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001727 xmlFreeInputStream(value);
1728 ctxt->inputMax /= 2;
1729 value = NULL;
1730 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001731 }
1732 }
1733 ctxt->inputTab[ctxt->inputNr] = value;
1734 ctxt->input = value;
1735 return (ctxt->inputNr++);
1736}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001737/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001738 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001739 * @ctxt: an XML parser context
1740 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001742 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001743 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001744 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001745xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001746inputPop(xmlParserCtxtPtr ctxt)
1747{
1748 xmlParserInputPtr ret;
1749
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001750 if (ctxt == NULL)
1751 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001752 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001753 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001754 ctxt->inputNr--;
1755 if (ctxt->inputNr > 0)
1756 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1757 else
1758 ctxt->input = NULL;
1759 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001760 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001761 return (ret);
1762}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001763/**
1764 * nodePush:
1765 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001766 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001767 *
1768 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001769 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001770 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001771 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001772int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001773nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1774{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001775 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001776 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001777 xmlNodePtr *tmp;
1778
1779 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1780 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001781 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001782 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001783 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001784 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001785 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001786 ctxt->nodeTab = tmp;
1787 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001788 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001789 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001791 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001792 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001793 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001794 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001795 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001796 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001797 ctxt->nodeTab[ctxt->nodeNr] = value;
1798 ctxt->node = value;
1799 return (ctxt->nodeNr++);
1800}
Daniel Veillard8915c152008-08-26 13:05:34 +00001801
Daniel Veillard1c732d22002-11-30 11:22:59 +00001802/**
1803 * nodePop:
1804 * @ctxt: an XML parser context
1805 *
1806 * Pops the top element node from the node stack
1807 *
1808 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001809 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001810xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811nodePop(xmlParserCtxtPtr ctxt)
1812{
1813 xmlNodePtr ret;
1814
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001815 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001816 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001817 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818 ctxt->nodeNr--;
1819 if (ctxt->nodeNr > 0)
1820 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1821 else
1822 ctxt->node = NULL;
1823 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001824 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001825 return (ret);
1826}
Daniel Veillarda2351322004-06-27 12:08:10 +00001827
1828#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001829/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001830 * nameNsPush:
1831 * @ctxt: an XML parser context
1832 * @value: the element name
1833 * @prefix: the element prefix
1834 * @URI: the element namespace name
1835 *
1836 * Pushes a new element name/prefix/URL on top of the name stack
1837 *
1838 * Returns -1 in case of error, the index in the stack otherwise
1839 */
1840static int
1841nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1843{
1844 if (ctxt->nameNr >= ctxt->nameMax) {
1845 const xmlChar * *tmp;
1846 void **tmp2;
1847 ctxt->nameMax *= 2;
1848 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1849 ctxt->nameMax *
1850 sizeof(ctxt->nameTab[0]));
1851 if (tmp == NULL) {
1852 ctxt->nameMax /= 2;
1853 goto mem_error;
1854 }
1855 ctxt->nameTab = tmp;
1856 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1857 ctxt->nameMax * 3 *
1858 sizeof(ctxt->pushTab[0]));
1859 if (tmp2 == NULL) {
1860 ctxt->nameMax /= 2;
1861 goto mem_error;
1862 }
1863 ctxt->pushTab = tmp2;
1864 }
1865 ctxt->nameTab[ctxt->nameNr] = value;
1866 ctxt->name = value;
1867 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001869 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 return (ctxt->nameNr++);
1871mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001872 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873 return (-1);
1874}
1875/**
1876 * nameNsPop:
1877 * @ctxt: an XML parser context
1878 *
1879 * Pops the top element/prefix/URI name from the name stack
1880 *
1881 * Returns the name just removed
1882 */
1883static const xmlChar *
1884nameNsPop(xmlParserCtxtPtr ctxt)
1885{
1886 const xmlChar *ret;
1887
1888 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001889 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001890 ctxt->nameNr--;
1891 if (ctxt->nameNr > 0)
1892 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1893 else
1894 ctxt->name = NULL;
1895 ret = ctxt->nameTab[ctxt->nameNr];
1896 ctxt->nameTab[ctxt->nameNr] = NULL;
1897 return (ret);
1898}
Daniel Veillarda2351322004-06-27 12:08:10 +00001899#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001900
1901/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 * namePush:
1903 * @ctxt: an XML parser context
1904 * @value: the element name
1905 *
1906 * Pushes a new element name on top of the name stack
1907 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001908 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001910int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001911namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001912{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001913 if (ctxt == NULL) return (-1);
1914
Daniel Veillard1c732d22002-11-30 11:22:59 +00001915 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001916 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001917 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001918 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001919 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001920 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001921 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001922 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001923 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001924 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001925 }
1926 ctxt->nameTab[ctxt->nameNr] = value;
1927 ctxt->name = value;
1928 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001929mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001930 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001931 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001932}
1933/**
1934 * namePop:
1935 * @ctxt: an XML parser context
1936 *
1937 * Pops the top element name from the name stack
1938 *
1939 * Returns the name just removed
1940 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001941const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001942namePop(xmlParserCtxtPtr ctxt)
1943{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001944 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001945
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001946 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1947 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001948 ctxt->nameNr--;
1949 if (ctxt->nameNr > 0)
1950 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1951 else
1952 ctxt->name = NULL;
1953 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001954 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001955 return (ret);
1956}
Owen Taylor3473f882001-02-23 17:55:21 +00001957
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001958static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001959 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001960 int *tmp;
1961
Owen Taylor3473f882001-02-23 17:55:21 +00001962 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001963 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1965 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001966 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001967 ctxt->spaceMax /=2;
1968 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001969 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001970 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001971 }
1972 ctxt->spaceTab[ctxt->spaceNr] = val;
1973 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974 return(ctxt->spaceNr++);
1975}
1976
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001977static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001978 int ret;
1979 if (ctxt->spaceNr <= 0) return(0);
1980 ctxt->spaceNr--;
1981 if (ctxt->spaceNr > 0)
1982 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1983 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001984 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001985 ret = ctxt->spaceTab[ctxt->spaceNr];
1986 ctxt->spaceTab[ctxt->spaceNr] = -1;
1987 return(ret);
1988}
1989
1990/*
1991 * Macros for accessing the content. Those should be used only by the parser,
1992 * and not exported.
1993 *
1994 * Dirty macros, i.e. one often need to make assumption on the context to
1995 * use them
1996 *
1997 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1998 * To be used with extreme caution since operations consuming
1999 * characters may move the input buffer to a different location !
2000 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2001 * This should be used internally by the parser
2002 * only to compare to ASCII values otherwise it would break when
2003 * running with UTF-8 encoding.
2004 * RAW same as CUR but in the input buffer, bypass any token
2005 * extraction that may have been done
2006 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2007 * to compare on ASCII based substring.
2008 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002009 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002010 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002011 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002012 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2013 *
2014 * NEXT Skip to the next character, this does the proper decoding
2015 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002016 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002017 * CUR_CHAR(l) returns the current unicode character (int), set l
2018 * to the number of xmlChars used for the encoding [0-5].
2019 * CUR_SCHAR same but operate on a string instead of the context
2020 * COPY_BUF copy the current unicode char to the target buffer, increment
2021 * the index
2022 * GROW, SHRINK handling of input buffers
2023 */
2024
Daniel Veillardfdc91562002-07-01 21:52:03 +00002025#define RAW (*ctxt->input->cur)
2026#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002027#define NXT(val) ctxt->input->cur[(val)]
2028#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002029#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002030
Daniel Veillarda07050d2003-10-19 14:46:32 +00002031#define CMP4( s, c1, c2, c3, c4 ) \
2032 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034#define CMP5( s, c1, c2, c3, c4, c5 ) \
2035 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044 ((unsigned char *) s)[ 8 ] == c9 )
2045#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047 ((unsigned char *) s)[ 9 ] == c10 )
2048
Owen Taylor3473f882001-02-23 17:55:21 +00002049#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002050 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002051 if (*ctxt->input->cur == 0) \
2052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002053 } while (0)
2054
Daniel Veillard0b787f32004-03-26 17:29:53 +00002055#define SKIPL(val) do { \
2056 int skipl; \
2057 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002058 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002059 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002060 } else ctxt->input->col++; \
2061 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002062 ctxt->input->cur++; \
2063 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002064 if (*ctxt->input->cur == 0) \
2065 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002066 } while (0)
2067
Daniel Veillarda880b122003-04-21 21:36:41 +00002068#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002069 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002071 xmlSHRINK (ctxt);
2072
2073static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002075 if (*ctxt->input->cur == 0)
2076 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2077}
Owen Taylor3473f882001-02-23 17:55:21 +00002078
Daniel Veillarda880b122003-04-21 21:36:41 +00002079#define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002081 xmlGROW (ctxt);
2082
2083static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2086
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002092 xmlHaltParser(ctxt);
2093 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002094 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2100 return;
2101 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002104}
Owen Taylor3473f882001-02-23 17:55:21 +00002105
2106#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2107
2108#define NEXT xmlNextChar(ctxt)
2109
Daniel Veillard21a0f912001-02-25 19:54:14 +00002110#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002111 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002112 ctxt->input->cur++; \
2113 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002114 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002115 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2116 }
2117
Owen Taylor3473f882001-02-23 17:55:21 +00002118#define NEXTL(l) do { \
2119 if (*(ctxt->input->cur) == '\n') { \
2120 ctxt->input->line++; ctxt->input->col = 1; \
2121 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002122 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002123 } while (0)
2124
2125#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2127
2128#define COPY_BUF(l,b,i,v) \
2129 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002130 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002131
2132/**
2133 * xmlSkipBlankChars:
2134 * @ctxt: the XML parser context
2135 *
2136 * skip all blanks character found at that point in the input streams.
2137 * It pops up finished entities in the process if allowable at that point.
2138 *
2139 * Returns the number of space chars skipped
2140 */
2141
2142int
2143xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002144 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002145
2146 /*
2147 * It's Okay to use CUR/NEXT here since all the blanks are on
2148 * the ASCII range.
2149 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002150 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2151 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002152 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002153 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002154 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002155 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002156 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002157 if (*cur == '\n') {
2158 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002159 } else {
2160 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002161 }
2162 cur++;
2163 res++;
2164 if (*cur == 0) {
2165 ctxt->input->cur = cur;
2166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167 cur = ctxt->input->cur;
2168 }
2169 }
2170 ctxt->input->cur = cur;
2171 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002172 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2173
2174 while (1) {
2175 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002176 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002177 } else if (CUR == '%') {
2178 /*
2179 * Need to handle support of entities branching here
2180 */
2181 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2182 break;
2183 xmlParsePEReference(ctxt);
2184 } else if (CUR == 0) {
2185 if (ctxt->inputNr <= 1)
2186 break;
2187 xmlPopInput(ctxt);
2188 } else {
2189 break;
2190 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002191
2192 /*
2193 * Also increase the counter when entering or exiting a PERef.
2194 * The spec says: "When a parameter-entity reference is recognized
2195 * in the DTD and included, its replacement text MUST be enlarged
2196 * by the attachment of one leading and one following space (#x20)
2197 * character."
2198 */
2199 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002200 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002201 }
Owen Taylor3473f882001-02-23 17:55:21 +00002202 return(res);
2203}
2204
2205/************************************************************************
2206 * *
2207 * Commodity functions to handle entities *
2208 * *
2209 ************************************************************************/
2210
2211/**
2212 * xmlPopInput:
2213 * @ctxt: an XML parser context
2214 *
2215 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216 * pop it and return the next char.
2217 *
2218 * Returns the current xmlChar in the parser context
2219 */
2220xmlChar
2221xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002222 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002223 if (xmlParserDebugEntities)
2224 xmlGenericError(xmlGenericErrorContext,
2225 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002226 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227 (ctxt->instate != XML_PARSER_EOF))
2228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002230 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002231 if (*ctxt->input->cur == 0)
2232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002233 return(CUR);
2234}
2235
2236/**
2237 * xmlPushInput:
2238 * @ctxt: an XML parser context
2239 * @input: an XML parser input fragment (entity, XML fragment ...).
2240 *
2241 * xmlPushInput: switch to a new input stream which is stacked on top
2242 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002243 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002244 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002245int
Owen Taylor3473f882001-02-23 17:55:21 +00002246xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002247 int ret;
2248 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002249
2250 if (xmlParserDebugEntities) {
2251 if ((ctxt->input != NULL) && (ctxt->input->filename))
2252 xmlGenericError(xmlGenericErrorContext,
2253 "%s(%d): ", ctxt->input->filename,
2254 ctxt->input->line);
2255 xmlGenericError(xmlGenericErrorContext,
2256 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2257 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002258 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259 (ctxt->inputNr > 1024)) {
2260 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261 while (ctxt->inputNr > 1)
2262 xmlFreeInputStream(inputPop(ctxt));
2263 return(-1);
2264 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002265 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002266 if (ctxt->instate == XML_PARSER_EOF)
2267 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002268 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002269 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002270}
2271
2272/**
2273 * xmlParseCharRef:
2274 * @ctxt: an XML parser context
2275 *
2276 * parse Reference declarations
2277 *
2278 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279 * '&#x' [0-9a-fA-F]+ ';'
2280 *
2281 * [ WFC: Legal Character ]
2282 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002283 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002284 *
2285 * Returns the value parsed (as an int), 0 in case of error
2286 */
2287int
2288xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002289 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002290 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002291 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002292
Owen Taylor3473f882001-02-23 17:55:21 +00002293 /*
2294 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2295 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002296 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002297 (NXT(2) == 'x')) {
2298 SKIP(3);
2299 GROW;
2300 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002301 if (count++ > 20) {
2302 count = 0;
2303 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002304 if (ctxt->instate == XML_PARSER_EOF)
2305 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002306 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002307 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002308 val = val * 16 + (CUR - '0');
2309 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310 val = val * 16 + (CUR - 'a') + 10;
2311 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312 val = val * 16 + (CUR - 'A') + 10;
2313 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002314 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002315 val = 0;
2316 break;
2317 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002318 if (val > 0x10FFFF)
2319 outofrange = val;
2320
Owen Taylor3473f882001-02-23 17:55:21 +00002321 NEXT;
2322 count++;
2323 }
2324 if (RAW == ';') {
2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002326 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002327 ctxt->nbChars ++;
2328 ctxt->input->cur++;
2329 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002330 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002331 SKIP(2);
2332 GROW;
2333 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002334 if (count++ > 20) {
2335 count = 0;
2336 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002337 if (ctxt->instate == XML_PARSER_EOF)
2338 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002339 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002340 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002341 val = val * 10 + (CUR - '0');
2342 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002343 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002344 val = 0;
2345 break;
2346 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002347 if (val > 0x10FFFF)
2348 outofrange = val;
2349
Owen Taylor3473f882001-02-23 17:55:21 +00002350 NEXT;
2351 count++;
2352 }
2353 if (RAW == ';') {
2354 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002355 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002356 ctxt->nbChars ++;
2357 ctxt->input->cur++;
2358 }
2359 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 }
2362
2363 /*
2364 * [ WFC: Legal Character ]
2365 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002366 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002367 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002368 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002369 return(val);
2370 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002371 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372 "xmlParseCharRef: invalid xmlChar value %d\n",
2373 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002374 }
2375 return(0);
2376}
2377
2378/**
2379 * xmlParseStringCharRef:
2380 * @ctxt: an XML parser context
2381 * @str: a pointer to an index in the string
2382 *
2383 * parse Reference declarations, variant parsing from a string rather
2384 * than an an input flow.
2385 *
2386 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387 * '&#x' [0-9a-fA-F]+ ';'
2388 *
2389 * [ WFC: Legal Character ]
2390 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002391 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002392 *
2393 * Returns the value parsed (as an int), 0 in case of error, str will be
2394 * updated to the current value of the index
2395 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002396static int
Owen Taylor3473f882001-02-23 17:55:21 +00002397xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2398 const xmlChar *ptr;
2399 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002400 unsigned int val = 0;
2401 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002402
2403 if ((str == NULL) || (*str == NULL)) return(0);
2404 ptr = *str;
2405 cur = *ptr;
2406 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2407 ptr += 3;
2408 cur = *ptr;
2409 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002410 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002411 val = val * 16 + (cur - '0');
2412 else if ((cur >= 'a') && (cur <= 'f'))
2413 val = val * 16 + (cur - 'a') + 10;
2414 else if ((cur >= 'A') && (cur <= 'F'))
2415 val = val * 16 + (cur - 'A') + 10;
2416 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002417 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002418 val = 0;
2419 break;
2420 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002421 if (val > 0x10FFFF)
2422 outofrange = val;
2423
Owen Taylor3473f882001-02-23 17:55:21 +00002424 ptr++;
2425 cur = *ptr;
2426 }
2427 if (cur == ';')
2428 ptr++;
2429 } else if ((cur == '&') && (ptr[1] == '#')){
2430 ptr += 2;
2431 cur = *ptr;
2432 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002433 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002434 val = val * 10 + (cur - '0');
2435 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002436 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002437 val = 0;
2438 break;
2439 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002440 if (val > 0x10FFFF)
2441 outofrange = val;
2442
Owen Taylor3473f882001-02-23 17:55:21 +00002443 ptr++;
2444 cur = *ptr;
2445 }
2446 if (cur == ';')
2447 ptr++;
2448 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002449 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002450 return(0);
2451 }
2452 *str = ptr;
2453
2454 /*
2455 * [ WFC: Legal Character ]
2456 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002457 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002458 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002459 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002460 return(val);
2461 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002462 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2464 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002465 }
2466 return(0);
2467}
2468
2469/**
2470 * xmlParserHandlePEReference:
2471 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002472 *
Owen Taylor3473f882001-02-23 17:55:21 +00002473 * [69] PEReference ::= '%' Name ';'
2474 *
2475 * [ WFC: No Recursion ]
2476 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002477 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *
2479 * [ WFC: Entity Declared ]
2480 * In a document without any DTD, a document with only an internal DTD
2481 * subset which contains no parameter entity references, or a document
2482 * with "standalone='yes'", ... ... The declaration of a parameter
2483 * entity must precede any reference to it...
2484 *
2485 * [ VC: Entity Declared ]
2486 * In a document with an external subset or external parameter entities
2487 * with "standalone='no'", ... ... The declaration of a parameter entity
2488 * must precede any reference to it...
2489 *
2490 * [ WFC: In DTD ]
2491 * Parameter-entity references may only appear in the DTD.
2492 * NOTE: misleading but this is handled.
2493 *
2494 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002495 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002496 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002497 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002498 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002499 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002500 */
2501void
2502xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002503 switch(ctxt->instate) {
2504 case XML_PARSER_CDATA_SECTION:
2505 return;
2506 case XML_PARSER_COMMENT:
2507 return;
2508 case XML_PARSER_START_TAG:
2509 return;
2510 case XML_PARSER_END_TAG:
2511 return;
2512 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002513 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002514 return;
2515 case XML_PARSER_PROLOG:
2516 case XML_PARSER_START:
2517 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002518 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002519 return;
2520 case XML_PARSER_ENTITY_DECL:
2521 case XML_PARSER_CONTENT:
2522 case XML_PARSER_ATTRIBUTE_VALUE:
2523 case XML_PARSER_PI:
2524 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002525 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002526 /* we just ignore it there */
2527 return;
2528 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002529 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002530 return;
2531 case XML_PARSER_ENTITY_VALUE:
2532 /*
2533 * NOTE: in the case of entity values, we don't do the
2534 * substitution here since we need the literal
2535 * entity value to be able to save the internal
2536 * subset of the document.
2537 * This will be handled by xmlStringDecodeEntities
2538 */
2539 return;
2540 case XML_PARSER_DTD:
2541 /*
2542 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543 * In the internal DTD subset, parameter-entity references
2544 * can occur only where markup declarations can occur, not
2545 * within markup declarations.
2546 * In that case this is handled in xmlParseMarkupDecl
2547 */
2548 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2549 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002550 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002551 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002552 break;
2553 case XML_PARSER_IGNORE:
2554 return;
2555 }
2556
Nick Wellnhofer03904152017-06-05 21:16:00 +02002557 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002558}
2559
2560/*
2561 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002562 * buffer##_size is expected to be a size_t
2563 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002564 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002565#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002566 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002567 size_t new_size = buffer##_size * 2 + n; \
2568 if (new_size < buffer##_size) goto mem_error; \
2569 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002570 if (tmp == NULL) goto mem_error; \
2571 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002572 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002573}
2574
2575/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002576 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002577 * @ctxt: the parser context
2578 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002579 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002580 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581 * @end: an end marker xmlChar, 0 if none
2582 * @end2: an end marker xmlChar, 0 if none
2583 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002584 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002585 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002586 *
2587 * [67] Reference ::= EntityRef | CharRef
2588 *
2589 * [69] PEReference ::= '%' Name ';'
2590 *
2591 * Returns A newly allocated string with the substitution done. The caller
2592 * must deallocate it !
2593 */
2594xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002595xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002597 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002598 size_t buffer_size = 0;
2599 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002600
2601 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002602 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002603 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002604 xmlEntityPtr ent;
2605 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002606
Daniel Veillarda82b1822004-11-08 16:24:57 +00002607 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002609 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002610
Daniel Veillard0161e632008-08-28 15:36:32 +00002611 if (((ctxt->depth > 40) &&
2612 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002615 return(NULL);
2616 }
2617
2618 /*
2619 * allocate a translation buffer.
2620 */
2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002622 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002623 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002624
2625 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002626 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002627 * we are operating on already parsed values.
2628 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002629 if (str < last)
2630 c = CUR_SCHAR(str, l);
2631 else
2632 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002633 while ((c != 0) && (c != end) && /* non input consuming loop */
2634 (c != end2) && (c != end3)) {
2635
2636 if (c == 0) break;
2637 if ((c == '&') && (str[1] == '#')) {
2638 int val = xmlParseStringCharRef(ctxt, &str);
2639 if (val != 0) {
2640 COPY_BUF(0,buffer,nbchars,val);
2641 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002642 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002643 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002644 }
Owen Taylor3473f882001-02-23 17:55:21 +00002645 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646 if (xmlParserDebugEntities)
2647 xmlGenericError(xmlGenericErrorContext,
2648 "String decoding Entity Reference: %.30s\n",
2649 str);
2650 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002651 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2652 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002653 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002654 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002655 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002656 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002657 if ((ent != NULL) &&
2658 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2659 if (ent->content != NULL) {
2660 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002661 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002662 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002663 }
Owen Taylor3473f882001-02-23 17:55:21 +00002664 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002665 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2666 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002667 }
2668 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002669 ctxt->depth++;
2670 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2671 0, 0, 0);
2672 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002673
David Drysdale69030712015-11-20 11:13:45 +08002674 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2675 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2676 goto int_error;
2677
Owen Taylor3473f882001-02-23 17:55:21 +00002678 if (rep != NULL) {
2679 current = rep;
2680 while (*current != 0) { /* non input consuming loop */
2681 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002682 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002683 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002684 goto int_error;
2685 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002686 }
2687 }
2688 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002689 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 }
2691 } else if (ent != NULL) {
2692 int i = xmlStrlen(ent->name);
2693 const xmlChar *cur = ent->name;
2694
2695 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002696 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002697 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002698 }
2699 for (;i > 0;i--)
2700 buffer[nbchars++] = *cur++;
2701 buffer[nbchars++] = ';';
2702 }
2703 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2704 if (xmlParserDebugEntities)
2705 xmlGenericError(xmlGenericErrorContext,
2706 "String decoding PE Reference: %.30s\n", str);
2707 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002708 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2709 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002710 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002711 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002712 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002713 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002714 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002715 /*
2716 * Note: external parsed entities will not be loaded,
2717 * it is not required for a non-validating parser to
2718 * complete external PEreferences coming from the
2719 * internal subset
2720 */
2721 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2722 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2723 (ctxt->validate != 0)) {
2724 xmlLoadEntityContent(ctxt, ent);
2725 } else {
2726 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2727 "not validating will not read content for PE entity %s\n",
2728 ent->name, NULL);
2729 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002730 }
Owen Taylor3473f882001-02-23 17:55:21 +00002731 ctxt->depth++;
2732 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2733 0, 0, 0);
2734 ctxt->depth--;
2735 if (rep != NULL) {
2736 current = rep;
2737 while (*current != 0) { /* non input consuming loop */
2738 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002739 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002740 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002741 goto int_error;
2742 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002743 }
2744 }
2745 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002746 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002747 }
2748 }
2749 } else {
2750 COPY_BUF(l,buffer,nbchars,c);
2751 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002752 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2753 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 }
2755 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002756 if (str < last)
2757 c = CUR_SCHAR(str, l);
2758 else
2759 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002760 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002761 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002762 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002763
2764mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002765 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002766int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002767 if (rep != NULL)
2768 xmlFree(rep);
2769 if (buffer != NULL)
2770 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002771 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002772}
2773
Daniel Veillarde57ec792003-09-10 10:50:59 +00002774/**
2775 * xmlStringDecodeEntities:
2776 * @ctxt: the parser context
2777 * @str: the input string
2778 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2779 * @end: an end marker xmlChar, 0 if none
2780 * @end2: an end marker xmlChar, 0 if none
2781 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002782 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002783 * Takes a entity string content and process to do the adequate substitutions.
2784 *
2785 * [67] Reference ::= EntityRef | CharRef
2786 *
2787 * [69] PEReference ::= '%' Name ';'
2788 *
2789 * Returns A newly allocated string with the substitution done. The caller
2790 * must deallocate it !
2791 */
2792xmlChar *
2793xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2794 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002795 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002796 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2797 end, end2, end3));
2798}
Owen Taylor3473f882001-02-23 17:55:21 +00002799
2800/************************************************************************
2801 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002802 * Commodity functions, cleanup needed ? *
2803 * *
2804 ************************************************************************/
2805
2806/**
2807 * areBlanks:
2808 * @ctxt: an XML parser context
2809 * @str: a xmlChar *
2810 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002811 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002812 *
2813 * Is this a sequence of blank chars that one can ignore ?
2814 *
2815 * Returns 1 if ignorable 0 otherwise.
2816 */
2817
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002818static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2819 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002820 int i, ret;
2821 xmlNodePtr lastChild;
2822
Daniel Veillard05c13a22001-09-09 08:38:09 +00002823 /*
2824 * Don't spend time trying to differentiate them, the same callback is
2825 * used !
2826 */
2827 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002828 return(0);
2829
Owen Taylor3473f882001-02-23 17:55:21 +00002830 /*
2831 * Check for xml:space value.
2832 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002833 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2834 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002835 return(0);
2836
2837 /*
2838 * Check that the string is made of blanks
2839 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002840 if (blank_chars == 0) {
2841 for (i = 0;i < len;i++)
2842 if (!(IS_BLANK_CH(str[i]))) return(0);
2843 }
Owen Taylor3473f882001-02-23 17:55:21 +00002844
2845 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002846 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002847 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002848 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002849 if (ctxt->myDoc != NULL) {
2850 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2851 if (ret == 0) return(1);
2852 if (ret == 1) return(0);
2853 }
2854
2855 /*
2856 * Otherwise, heuristic :-\
2857 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002858 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002859 if ((ctxt->node->children == NULL) &&
2860 (RAW == '<') && (NXT(1) == '/')) return(0);
2861
2862 lastChild = xmlGetLastChild(ctxt->node);
2863 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002864 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2865 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002866 } else if (xmlNodeIsText(lastChild))
2867 return(0);
2868 else if ((ctxt->node->children != NULL) &&
2869 (xmlNodeIsText(ctxt->node->children)))
2870 return(0);
2871 return(1);
2872}
2873
Owen Taylor3473f882001-02-23 17:55:21 +00002874/************************************************************************
2875 * *
2876 * Extra stuff for namespace support *
2877 * Relates to http://www.w3.org/TR/WD-xml-names *
2878 * *
2879 ************************************************************************/
2880
2881/**
2882 * xmlSplitQName:
2883 * @ctxt: an XML parser context
2884 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002885 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002886 *
2887 * parse an UTF8 encoded XML qualified name string
2888 *
2889 * [NS 5] QName ::= (Prefix ':')? LocalPart
2890 *
2891 * [NS 6] Prefix ::= NCName
2892 *
2893 * [NS 7] LocalPart ::= NCName
2894 *
2895 * Returns the local part, and prefix is updated
2896 * to get the Prefix if any.
2897 */
2898
2899xmlChar *
2900xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2901 xmlChar buf[XML_MAX_NAMELEN + 5];
2902 xmlChar *buffer = NULL;
2903 int len = 0;
2904 int max = XML_MAX_NAMELEN;
2905 xmlChar *ret = NULL;
2906 const xmlChar *cur = name;
2907 int c;
2908
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002909 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002910 *prefix = NULL;
2911
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002912 if (cur == NULL) return(NULL);
2913
Owen Taylor3473f882001-02-23 17:55:21 +00002914#ifndef XML_XML_NAMESPACE
2915 /* xml: prefix is not really a namespace */
2916 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2917 (cur[2] == 'l') && (cur[3] == ':'))
2918 return(xmlStrdup(name));
2919#endif
2920
Daniel Veillard597bc482003-07-24 16:08:28 +00002921 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002922 if (cur[0] == ':')
2923 return(xmlStrdup(name));
2924
2925 c = *cur++;
2926 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2927 buf[len++] = c;
2928 c = *cur++;
2929 }
2930 if (len >= max) {
2931 /*
2932 * Okay someone managed to make a huge name, so he's ready to pay
2933 * for the processing speed.
2934 */
2935 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002936
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002937 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002938 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002939 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002940 return(NULL);
2941 }
2942 memcpy(buffer, buf, len);
2943 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2944 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002945 xmlChar *tmp;
2946
Owen Taylor3473f882001-02-23 17:55:21 +00002947 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002948 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002949 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002950 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002951 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002952 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002953 return(NULL);
2954 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002955 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002956 }
2957 buffer[len++] = c;
2958 c = *cur++;
2959 }
2960 buffer[len] = 0;
2961 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002962
Daniel Veillard597bc482003-07-24 16:08:28 +00002963 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002964 if (buffer != NULL)
2965 xmlFree(buffer);
2966 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002967 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002968 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002969
Owen Taylor3473f882001-02-23 17:55:21 +00002970 if (buffer == NULL)
2971 ret = xmlStrndup(buf, len);
2972 else {
2973 ret = buffer;
2974 buffer = NULL;
2975 max = XML_MAX_NAMELEN;
2976 }
2977
2978
2979 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002980 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002981 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002982 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002983 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002984 }
Owen Taylor3473f882001-02-23 17:55:21 +00002985 len = 0;
2986
Daniel Veillardbb284f42002-10-16 18:02:47 +00002987 /*
2988 * Check that the first character is proper to start
2989 * a new name
2990 */
2991 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2992 ((c >= 0x41) && (c <= 0x5A)) ||
2993 (c == '_') || (c == ':'))) {
2994 int l;
2995 int first = CUR_SCHAR(cur, l);
2996
2997 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002998 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002999 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003000 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003001 }
3002 }
3003 cur++;
3004
Owen Taylor3473f882001-02-23 17:55:21 +00003005 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3006 buf[len++] = c;
3007 c = *cur++;
3008 }
3009 if (len >= max) {
3010 /*
3011 * Okay someone managed to make a huge name, so he's ready to pay
3012 * for the processing speed.
3013 */
3014 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003015
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003016 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003018 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003019 return(NULL);
3020 }
3021 memcpy(buffer, buf, len);
3022 while (c != 0) { /* tested bigname2.xml */
3023 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003024 xmlChar *tmp;
3025
Owen Taylor3473f882001-02-23 17:55:21 +00003026 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003027 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003028 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003029 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003030 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003031 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003032 return(NULL);
3033 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003034 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003035 }
3036 buffer[len++] = c;
3037 c = *cur++;
3038 }
3039 buffer[len] = 0;
3040 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003041
Owen Taylor3473f882001-02-23 17:55:21 +00003042 if (buffer == NULL)
3043 ret = xmlStrndup(buf, len);
3044 else {
3045 ret = buffer;
3046 }
3047 }
3048
3049 return(ret);
3050}
3051
3052/************************************************************************
3053 * *
3054 * The parser itself *
3055 * Relates to http://www.w3.org/TR/REC-xml *
3056 * *
3057 ************************************************************************/
3058
Daniel Veillard34e3f642008-07-29 09:02:27 +00003059/************************************************************************
3060 * *
3061 * Routines to parse Name, NCName and NmToken *
3062 * *
3063 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003064#ifdef DEBUG
3065static unsigned long nbParseName = 0;
3066static unsigned long nbParseNmToken = 0;
3067static unsigned long nbParseNCName = 0;
3068static unsigned long nbParseNCNameComplex = 0;
3069static unsigned long nbParseNameComplex = 0;
3070static unsigned long nbParseStringName = 0;
3071#endif
3072
Daniel Veillard34e3f642008-07-29 09:02:27 +00003073/*
3074 * The two following functions are related to the change of accepted
3075 * characters for Name and NmToken in the Revision 5 of XML-1.0
3076 * They correspond to the modified production [4] and the new production [4a]
3077 * changes in that revision. Also note that the macros used for the
3078 * productions Letter, Digit, CombiningChar and Extender are not needed
3079 * anymore.
3080 * We still keep compatibility to pre-revision5 parsing semantic if the
3081 * new XML_PARSE_OLD10 option is given to the parser.
3082 */
3083static int
3084xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3085 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3086 /*
3087 * Use the new checks of production [4] [4a] amd [5] of the
3088 * Update 5 of XML-1.0
3089 */
3090 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3091 (((c >= 'a') && (c <= 'z')) ||
3092 ((c >= 'A') && (c <= 'Z')) ||
3093 (c == '_') || (c == ':') ||
3094 ((c >= 0xC0) && (c <= 0xD6)) ||
3095 ((c >= 0xD8) && (c <= 0xF6)) ||
3096 ((c >= 0xF8) && (c <= 0x2FF)) ||
3097 ((c >= 0x370) && (c <= 0x37D)) ||
3098 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3099 ((c >= 0x200C) && (c <= 0x200D)) ||
3100 ((c >= 0x2070) && (c <= 0x218F)) ||
3101 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3102 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3103 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3104 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3105 ((c >= 0x10000) && (c <= 0xEFFFF))))
3106 return(1);
3107 } else {
3108 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3109 return(1);
3110 }
3111 return(0);
3112}
3113
3114static int
3115xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3116 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3117 /*
3118 * Use the new checks of production [4] [4a] amd [5] of the
3119 * Update 5 of XML-1.0
3120 */
3121 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3122 (((c >= 'a') && (c <= 'z')) ||
3123 ((c >= 'A') && (c <= 'Z')) ||
3124 ((c >= '0') && (c <= '9')) || /* !start */
3125 (c == '_') || (c == ':') ||
3126 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3127 ((c >= 0xC0) && (c <= 0xD6)) ||
3128 ((c >= 0xD8) && (c <= 0xF6)) ||
3129 ((c >= 0xF8) && (c <= 0x2FF)) ||
3130 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3131 ((c >= 0x370) && (c <= 0x37D)) ||
3132 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3133 ((c >= 0x200C) && (c <= 0x200D)) ||
3134 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3135 ((c >= 0x2070) && (c <= 0x218F)) ||
3136 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3137 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3138 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3139 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3140 ((c >= 0x10000) && (c <= 0xEFFFF))))
3141 return(1);
3142 } else {
3143 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3144 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003145 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003146 (IS_COMBINING(c)) ||
3147 (IS_EXTENDER(c)))
3148 return(1);
3149 }
3150 return(0);
3151}
3152
Daniel Veillarde57ec792003-09-10 10:50:59 +00003153static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003154 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003155
Daniel Veillard34e3f642008-07-29 09:02:27 +00003156static const xmlChar *
3157xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3158 int len = 0, l;
3159 int c;
3160 int count = 0;
3161
Daniel Veillardc6561462009-03-25 10:22:31 +00003162#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003163 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003164#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003165
3166 /*
3167 * Handler for more complex cases
3168 */
3169 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003170 if (ctxt->instate == XML_PARSER_EOF)
3171 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003172 c = CUR_CHAR(l);
3173 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3174 /*
3175 * Use the new checks of production [4] [4a] amd [5] of the
3176 * Update 5 of XML-1.0
3177 */
3178 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3179 (!(((c >= 'a') && (c <= 'z')) ||
3180 ((c >= 'A') && (c <= 'Z')) ||
3181 (c == '_') || (c == ':') ||
3182 ((c >= 0xC0) && (c <= 0xD6)) ||
3183 ((c >= 0xD8) && (c <= 0xF6)) ||
3184 ((c >= 0xF8) && (c <= 0x2FF)) ||
3185 ((c >= 0x370) && (c <= 0x37D)) ||
3186 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3187 ((c >= 0x200C) && (c <= 0x200D)) ||
3188 ((c >= 0x2070) && (c <= 0x218F)) ||
3189 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3190 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3191 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3192 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3193 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3194 return(NULL);
3195 }
3196 len += l;
3197 NEXTL(l);
3198 c = CUR_CHAR(l);
3199 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3200 (((c >= 'a') && (c <= 'z')) ||
3201 ((c >= 'A') && (c <= 'Z')) ||
3202 ((c >= '0') && (c <= '9')) || /* !start */
3203 (c == '_') || (c == ':') ||
3204 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3205 ((c >= 0xC0) && (c <= 0xD6)) ||
3206 ((c >= 0xD8) && (c <= 0xF6)) ||
3207 ((c >= 0xF8) && (c <= 0x2FF)) ||
3208 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3209 ((c >= 0x370) && (c <= 0x37D)) ||
3210 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3211 ((c >= 0x200C) && (c <= 0x200D)) ||
3212 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3213 ((c >= 0x2070) && (c <= 0x218F)) ||
3214 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3215 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3216 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3217 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3218 ((c >= 0x10000) && (c <= 0xEFFFF))
3219 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003220 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003221 count = 0;
3222 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003223 if (ctxt->instate == XML_PARSER_EOF)
3224 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003225 }
3226 len += l;
3227 NEXTL(l);
3228 c = CUR_CHAR(l);
3229 }
3230 } else {
3231 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3232 (!IS_LETTER(c) && (c != '_') &&
3233 (c != ':'))) {
3234 return(NULL);
3235 }
3236 len += l;
3237 NEXTL(l);
3238 c = CUR_CHAR(l);
3239
3240 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3241 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3242 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003243 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003244 (IS_COMBINING(c)) ||
3245 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003246 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003247 count = 0;
3248 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003249 if (ctxt->instate == XML_PARSER_EOF)
3250 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003251 }
3252 len += l;
3253 NEXTL(l);
3254 c = CUR_CHAR(l);
3255 }
3256 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003257 if ((len > XML_MAX_NAME_LENGTH) &&
3258 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3259 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3260 return(NULL);
3261 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003262 if (ctxt->input->cur - ctxt->input->base < len) {
3263 /*
3264 * There were a couple of bugs where PERefs lead to to a change
3265 * of the buffer. Check the buffer size to avoid passing an invalid
3266 * pointer to xmlDictLookup.
3267 */
3268 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3269 "unexpected change of input buffer");
3270 return (NULL);
3271 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003272 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3273 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3274 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3275}
3276
Owen Taylor3473f882001-02-23 17:55:21 +00003277/**
3278 * xmlParseName:
3279 * @ctxt: an XML parser context
3280 *
3281 * parse an XML name.
3282 *
3283 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3284 * CombiningChar | Extender
3285 *
3286 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3287 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003288 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003289 *
3290 * Returns the Name parsed or NULL
3291 */
3292
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003293const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003294xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003295 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003296 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003297 int count = 0;
3298
3299 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003300
Daniel Veillardc6561462009-03-25 10:22:31 +00003301#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003302 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003303#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003304
Daniel Veillard48b2f892001-02-25 16:11:03 +00003305 /*
3306 * Accelerator for simple ASCII names
3307 */
3308 in = ctxt->input->cur;
3309 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3310 ((*in >= 0x41) && (*in <= 0x5A)) ||
3311 (*in == '_') || (*in == ':')) {
3312 in++;
3313 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3314 ((*in >= 0x41) && (*in <= 0x5A)) ||
3315 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003316 (*in == '_') || (*in == '-') ||
3317 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003318 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003319 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003320 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003321 if ((count > XML_MAX_NAME_LENGTH) &&
3322 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3323 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3324 return(NULL);
3325 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003326 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003327 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003328 ctxt->nbChars += count;
3329 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003330 if (ret == NULL)
3331 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003332 return(ret);
3333 }
3334 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003335 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003336 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003337}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003338
Daniel Veillard34e3f642008-07-29 09:02:27 +00003339static const xmlChar *
3340xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3341 int len = 0, l;
3342 int c;
3343 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003344 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003345
Daniel Veillardc6561462009-03-25 10:22:31 +00003346#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003347 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003348#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003349
3350 /*
3351 * Handler for more complex cases
3352 */
3353 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003354 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003355 c = CUR_CHAR(l);
3356 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3357 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3358 return(NULL);
3359 }
3360
3361 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3362 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003363 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003364 if ((len > XML_MAX_NAME_LENGTH) &&
3365 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3366 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3367 return(NULL);
3368 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003369 count = 0;
3370 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003371 if (ctxt->instate == XML_PARSER_EOF)
3372 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003373 }
3374 len += l;
3375 NEXTL(l);
3376 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003377 if (c == 0) {
3378 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003379 /*
3380 * when shrinking to extend the buffer we really need to preserve
3381 * the part of the name we already parsed. Hence rolling back
3382 * by current lenght.
3383 */
3384 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003385 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003386 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003387 if (ctxt->instate == XML_PARSER_EOF)
3388 return(NULL);
3389 c = CUR_CHAR(l);
3390 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003391 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003392 if ((len > XML_MAX_NAME_LENGTH) &&
3393 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3394 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3395 return(NULL);
3396 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003397 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003398}
3399
3400/**
3401 * xmlParseNCName:
3402 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003403 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003404 *
3405 * parse an XML name.
3406 *
3407 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3408 * CombiningChar | Extender
3409 *
3410 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3411 *
3412 * Returns the Name parsed or NULL
3413 */
3414
3415static const xmlChar *
3416xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003417 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003418 const xmlChar *ret;
3419 int count = 0;
3420
Daniel Veillardc6561462009-03-25 10:22:31 +00003421#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003422 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003423#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003424
3425 /*
3426 * Accelerator for simple ASCII names
3427 */
3428 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003429 e = ctxt->input->end;
3430 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3431 ((*in >= 0x41) && (*in <= 0x5A)) ||
3432 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003433 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003434 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3435 ((*in >= 0x41) && (*in <= 0x5A)) ||
3436 ((*in >= 0x30) && (*in <= 0x39)) ||
3437 (*in == '_') || (*in == '-') ||
3438 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003439 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003440 if (in >= e)
3441 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003442 if ((*in > 0) && (*in < 0x80)) {
3443 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003444 if ((count > XML_MAX_NAME_LENGTH) &&
3445 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3446 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3447 return(NULL);
3448 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003449 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3450 ctxt->input->cur = in;
3451 ctxt->nbChars += count;
3452 ctxt->input->col += count;
3453 if (ret == NULL) {
3454 xmlErrMemory(ctxt, NULL);
3455 }
3456 return(ret);
3457 }
3458 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003459complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003460 return(xmlParseNCNameComplex(ctxt));
3461}
3462
Daniel Veillard46de64e2002-05-29 08:21:33 +00003463/**
3464 * xmlParseNameAndCompare:
3465 * @ctxt: an XML parser context
3466 *
3467 * parse an XML name and compares for match
3468 * (specialized for endtag parsing)
3469 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003470 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3471 * and the name for mismatch
3472 */
3473
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003474static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003475xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003476 register const xmlChar *cmp = other;
3477 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003478 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003479
3480 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003481 if (ctxt->instate == XML_PARSER_EOF)
3482 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003483
Daniel Veillard46de64e2002-05-29 08:21:33 +00003484 in = ctxt->input->cur;
3485 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003486 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003487 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003488 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003489 }
William M. Brack76e95df2003-10-18 16:20:14 +00003490 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003491 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003492 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003493 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003494 }
3495 /* failure (or end of input buffer), check with full function */
3496 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003497 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003498 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003499 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003500 }
3501 return ret;
3502}
3503
Owen Taylor3473f882001-02-23 17:55:21 +00003504/**
3505 * xmlParseStringName:
3506 * @ctxt: an XML parser context
3507 * @str: a pointer to the string pointer (IN/OUT)
3508 *
3509 * parse an XML name.
3510 *
3511 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3512 * CombiningChar | Extender
3513 *
3514 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3515 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003516 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003517 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003518 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003519 * is updated to the current location in the string.
3520 */
3521
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003522static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003523xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3524 xmlChar buf[XML_MAX_NAMELEN + 5];
3525 const xmlChar *cur = *str;
3526 int len = 0, l;
3527 int c;
3528
Daniel Veillardc6561462009-03-25 10:22:31 +00003529#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003530 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003531#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003532
Owen Taylor3473f882001-02-23 17:55:21 +00003533 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003534 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003535 return(NULL);
3536 }
3537
Daniel Veillard34e3f642008-07-29 09:02:27 +00003538 COPY_BUF(l,buf,len,c);
3539 cur += l;
3540 c = CUR_SCHAR(cur, l);
3541 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003542 COPY_BUF(l,buf,len,c);
3543 cur += l;
3544 c = CUR_SCHAR(cur, l);
3545 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3546 /*
3547 * Okay someone managed to make a huge name, so he's ready to pay
3548 * for the processing speed.
3549 */
3550 xmlChar *buffer;
3551 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003552
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003553 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003554 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003555 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003556 return(NULL);
3557 }
3558 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003559 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003560 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003561 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003562
3563 if ((len > XML_MAX_NAME_LENGTH) &&
3564 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3565 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3566 xmlFree(buffer);
3567 return(NULL);
3568 }
Owen Taylor3473f882001-02-23 17:55:21 +00003569 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003570 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003571 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003572 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003573 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003574 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003575 return(NULL);
3576 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003577 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003578 }
3579 COPY_BUF(l,buffer,len,c);
3580 cur += l;
3581 c = CUR_SCHAR(cur, l);
3582 }
3583 buffer[len] = 0;
3584 *str = cur;
3585 return(buffer);
3586 }
3587 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003588 if ((len > XML_MAX_NAME_LENGTH) &&
3589 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3590 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3591 return(NULL);
3592 }
Owen Taylor3473f882001-02-23 17:55:21 +00003593 *str = cur;
3594 return(xmlStrndup(buf, len));
3595}
3596
3597/**
3598 * xmlParseNmtoken:
3599 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003600 *
Owen Taylor3473f882001-02-23 17:55:21 +00003601 * parse an XML Nmtoken.
3602 *
3603 * [7] Nmtoken ::= (NameChar)+
3604 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003605 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003606 *
3607 * Returns the Nmtoken parsed or NULL
3608 */
3609
3610xmlChar *
3611xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3612 xmlChar buf[XML_MAX_NAMELEN + 5];
3613 int len = 0, l;
3614 int c;
3615 int count = 0;
3616
Daniel Veillardc6561462009-03-25 10:22:31 +00003617#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003618 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003619#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003620
Owen Taylor3473f882001-02-23 17:55:21 +00003621 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003622 if (ctxt->instate == XML_PARSER_EOF)
3623 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003624 c = CUR_CHAR(l);
3625
Daniel Veillard34e3f642008-07-29 09:02:27 +00003626 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003627 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003628 count = 0;
3629 GROW;
3630 }
3631 COPY_BUF(l,buf,len,c);
3632 NEXTL(l);
3633 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003634 if (c == 0) {
3635 count = 0;
3636 GROW;
3637 if (ctxt->instate == XML_PARSER_EOF)
3638 return(NULL);
3639 c = CUR_CHAR(l);
3640 }
Owen Taylor3473f882001-02-23 17:55:21 +00003641 if (len >= XML_MAX_NAMELEN) {
3642 /*
3643 * Okay someone managed to make a huge token, so he's ready to pay
3644 * for the processing speed.
3645 */
3646 xmlChar *buffer;
3647 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003648
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003649 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003650 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003651 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003652 return(NULL);
3653 }
3654 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003655 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003656 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003657 count = 0;
3658 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003659 if (ctxt->instate == XML_PARSER_EOF) {
3660 xmlFree(buffer);
3661 return(NULL);
3662 }
Owen Taylor3473f882001-02-23 17:55:21 +00003663 }
3664 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003665 xmlChar *tmp;
3666
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003667 if ((max > XML_MAX_NAME_LENGTH) &&
3668 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3669 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3670 xmlFree(buffer);
3671 return(NULL);
3672 }
Owen Taylor3473f882001-02-23 17:55:21 +00003673 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003674 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003675 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003676 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003677 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003678 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003679 return(NULL);
3680 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003681 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003682 }
3683 COPY_BUF(l,buffer,len,c);
3684 NEXTL(l);
3685 c = CUR_CHAR(l);
3686 }
3687 buffer[len] = 0;
3688 return(buffer);
3689 }
3690 }
3691 if (len == 0)
3692 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003693 if ((len > XML_MAX_NAME_LENGTH) &&
3694 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3695 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3696 return(NULL);
3697 }
Owen Taylor3473f882001-02-23 17:55:21 +00003698 return(xmlStrndup(buf, len));
3699}
3700
3701/**
3702 * xmlParseEntityValue:
3703 * @ctxt: an XML parser context
3704 * @orig: if non-NULL store a copy of the original entity value
3705 *
3706 * parse a value for ENTITY declarations
3707 *
3708 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3709 * "'" ([^%&'] | PEReference | Reference)* "'"
3710 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003711 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003712 */
3713
3714xmlChar *
3715xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3716 xmlChar *buf = NULL;
3717 int len = 0;
3718 int size = XML_PARSER_BUFFER_SIZE;
3719 int c, l;
3720 xmlChar stop;
3721 xmlChar *ret = NULL;
3722 const xmlChar *cur = NULL;
3723 xmlParserInputPtr input;
3724
3725 if (RAW == '"') stop = '"';
3726 else if (RAW == '\'') stop = '\'';
3727 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003728 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003729 return(NULL);
3730 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003731 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003732 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003733 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003734 return(NULL);
3735 }
3736
3737 /*
3738 * The content of the entity definition is copied in a buffer.
3739 */
3740
3741 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3742 input = ctxt->input;
3743 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003744 if (ctxt->instate == XML_PARSER_EOF) {
3745 xmlFree(buf);
3746 return(NULL);
3747 }
Owen Taylor3473f882001-02-23 17:55:21 +00003748 NEXT;
3749 c = CUR_CHAR(l);
3750 /*
3751 * NOTE: 4.4.5 Included in Literal
3752 * When a parameter entity reference appears in a literal entity
3753 * value, ... a single or double quote character in the replacement
3754 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003755 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003756 * In practice it means we stop the loop only when back at parsing
3757 * the initial entity and the quote is found
3758 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003759 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3760 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003761 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003762 xmlChar *tmp;
3763
Owen Taylor3473f882001-02-23 17:55:21 +00003764 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003765 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3766 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003767 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003768 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 return(NULL);
3770 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003771 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003772 }
3773 COPY_BUF(l,buf,len,c);
3774 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003775
3776 GROW;
3777 c = CUR_CHAR(l);
3778 if (c == 0) {
3779 GROW;
3780 c = CUR_CHAR(l);
3781 }
3782 }
3783 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003784 if (ctxt->instate == XML_PARSER_EOF) {
3785 xmlFree(buf);
3786 return(NULL);
3787 }
Owen Taylor3473f882001-02-23 17:55:21 +00003788
3789 /*
3790 * Raise problem w.r.t. '&' and '%' being used in non-entities
3791 * reference constructs. Note Charref will be handled in
3792 * xmlStringDecodeEntities()
3793 */
3794 cur = buf;
3795 while (*cur != 0) { /* non input consuming */
3796 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3797 xmlChar *name;
3798 xmlChar tmp = *cur;
3799
3800 cur++;
3801 name = xmlParseStringName(ctxt, &cur);
3802 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003803 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003804 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003805 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003806 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003807 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3808 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003809 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003810 }
3811 if (name != NULL)
3812 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003813 if (*cur == 0)
3814 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003815 }
3816 cur++;
3817 }
3818
3819 /*
3820 * Then PEReference entities are substituted.
3821 */
3822 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003823 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003824 xmlFree(buf);
3825 } else {
3826 NEXT;
3827 /*
3828 * NOTE: 4.4.7 Bypassed
3829 * When a general entity reference appears in the EntityValue in
3830 * an entity declaration, it is bypassed and left as is.
3831 * so XML_SUBSTITUTE_REF is not set here.
3832 */
Peter Simons8f30bdf2016-04-15 11:56:55 +02003833 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003834 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3835 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003836 --ctxt->depth;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003837 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003838 *orig = buf;
3839 else
3840 xmlFree(buf);
3841 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003842
Owen Taylor3473f882001-02-23 17:55:21 +00003843 return(ret);
3844}
3845
3846/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003847 * xmlParseAttValueComplex:
3848 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003849 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003850 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003851 *
3852 * parse a value for an attribute, this is the fallback function
3853 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003854 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003855 *
3856 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3857 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003858static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003859xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003860 xmlChar limit = 0;
3861 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003862 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003863 size_t len = 0;
3864 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003865 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003866 xmlChar *current = NULL;
3867 xmlEntityPtr ent;
3868
Owen Taylor3473f882001-02-23 17:55:21 +00003869 if (NXT(0) == '"') {
3870 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3871 limit = '"';
3872 NEXT;
3873 } else if (NXT(0) == '\'') {
3874 limit = '\'';
3875 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3876 NEXT;
3877 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003878 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003879 return(NULL);
3880 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003881
Owen Taylor3473f882001-02-23 17:55:21 +00003882 /*
3883 * allocate a translation buffer.
3884 */
3885 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003886 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003887 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003888
3889 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003890 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003891 */
3892 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003893 while (((NXT(0) != limit) && /* checked */
3894 (IS_CHAR(c)) && (c != '<')) &&
3895 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003896 /*
3897 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3898 * special option is given
3899 */
3900 if ((len > XML_MAX_TEXT_LENGTH) &&
3901 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3902 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003903 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003904 goto mem_error;
3905 }
Owen Taylor3473f882001-02-23 17:55:21 +00003906 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003907 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003908 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003909 if (NXT(1) == '#') {
3910 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003911
Owen Taylor3473f882001-02-23 17:55:21 +00003912 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003913 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003914 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003915 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003916 }
3917 buf[len++] = '&';
3918 } else {
3919 /*
3920 * The reparsing will be done in xmlStringGetNodeList()
3921 * called by the attribute() function in SAX.c
3922 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003923 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003924 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003925 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003926 buf[len++] = '&';
3927 buf[len++] = '#';
3928 buf[len++] = '3';
3929 buf[len++] = '8';
3930 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003931 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003932 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003933 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003934 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003935 }
Owen Taylor3473f882001-02-23 17:55:21 +00003936 len += xmlCopyChar(0, &buf[len], val);
3937 }
3938 } else {
3939 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003940 ctxt->nbentities++;
3941 if (ent != NULL)
3942 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003943 if ((ent != NULL) &&
3944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003945 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003946 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003947 }
3948 if ((ctxt->replaceEntities == 0) &&
3949 (ent->content[0] == '&')) {
3950 buf[len++] = '&';
3951 buf[len++] = '#';
3952 buf[len++] = '3';
3953 buf[len++] = '8';
3954 buf[len++] = ';';
3955 } else {
3956 buf[len++] = ent->content[0];
3957 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003958 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003959 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003960 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02003961 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003962 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003963 XML_SUBSTITUTE_REF,
3964 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003965 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003966 if (rep != NULL) {
3967 current = rep;
3968 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003969 if ((*current == 0xD) || (*current == 0xA) ||
3970 (*current == 0x9)) {
3971 buf[len++] = 0x20;
3972 current++;
3973 } else
3974 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003975 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003976 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003977 }
3978 }
3979 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003980 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003981 }
3982 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003983 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003984 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003985 }
Owen Taylor3473f882001-02-23 17:55:21 +00003986 if (ent->content != NULL)
3987 buf[len++] = ent->content[0];
3988 }
3989 } else if (ent != NULL) {
3990 int i = xmlStrlen(ent->name);
3991 const xmlChar *cur = ent->name;
3992
3993 /*
3994 * This may look absurd but is needed to detect
3995 * entities problems
3996 */
3997 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003998 (ent->content != NULL) && (ent->checked == 0)) {
3999 unsigned long oldnbent = ctxt->nbentities;
4000
Peter Simons8f30bdf2016-04-15 11:56:55 +02004001 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004002 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004003 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004004 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004005
Daniel Veillardcff25462013-03-11 15:57:55 +08004006 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004007 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004008 if (xmlStrchr(rep, '<'))
4009 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004010 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004011 rep = NULL;
4012 }
Owen Taylor3473f882001-02-23 17:55:21 +00004013 }
4014
4015 /*
4016 * Just output the reference
4017 */
4018 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004019 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004020 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004021 }
4022 for (;i > 0;i--)
4023 buf[len++] = *cur++;
4024 buf[len++] = ';';
4025 }
4026 }
4027 } else {
4028 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004029 if ((len != 0) || (!normalize)) {
4030 if ((!normalize) || (!in_space)) {
4031 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004032 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004033 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004034 }
4035 }
4036 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004037 }
4038 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004039 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004040 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004041 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004042 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 }
4044 }
4045 NEXTL(l);
4046 }
4047 GROW;
4048 c = CUR_CHAR(l);
4049 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004050 if (ctxt->instate == XML_PARSER_EOF)
4051 goto error;
4052
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004053 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004054 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004055 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004056 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004057 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004058 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004059 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004060 if ((c != 0) && (!IS_CHAR(c))) {
4061 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4062 "invalid character in attribute value\n");
4063 } else {
4064 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4065 "AttValue: ' expected\n");
4066 }
Owen Taylor3473f882001-02-23 17:55:21 +00004067 } else
4068 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004069
4070 /*
4071 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004072 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004073 */
4074 if (len >= INT_MAX) {
4075 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004076 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004077 goto mem_error;
4078 }
4079
4080 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004081 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004082
4083mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004084 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004085error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004086 if (buf != NULL)
4087 xmlFree(buf);
4088 if (rep != NULL)
4089 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004090 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004091}
4092
4093/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004094 * xmlParseAttValue:
4095 * @ctxt: an XML parser context
4096 *
4097 * parse a value for an attribute
4098 * Note: the parser won't do substitution of entities here, this
4099 * will be handled later in xmlStringGetNodeList
4100 *
4101 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4102 * "'" ([^<&'] | Reference)* "'"
4103 *
4104 * 3.3.3 Attribute-Value Normalization:
4105 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004106 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004107 * - a character reference is processed by appending the referenced
4108 * character to the attribute value
4109 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004110 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004111 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4112 * appending #x20 to the normalized value, except that only a single
4113 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004114 * parsed entity or the literal entity value of an internal parsed entity
4115 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004116 * If the declared value is not CDATA, then the XML processor must further
4117 * process the normalized attribute value by discarding any leading and
4118 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004119 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004120 * All attributes for which no declaration has been read should be treated
4121 * by a non-validating parser as if declared CDATA.
4122 *
4123 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4124 */
4125
4126
4127xmlChar *
4128xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004129 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004130 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004131}
4132
4133/**
Owen Taylor3473f882001-02-23 17:55:21 +00004134 * xmlParseSystemLiteral:
4135 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004136 *
Owen Taylor3473f882001-02-23 17:55:21 +00004137 * parse an XML Literal
4138 *
4139 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4140 *
4141 * Returns the SystemLiteral parsed or NULL
4142 */
4143
4144xmlChar *
4145xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4146 xmlChar *buf = NULL;
4147 int len = 0;
4148 int size = XML_PARSER_BUFFER_SIZE;
4149 int cur, l;
4150 xmlChar stop;
4151 int state = ctxt->instate;
4152 int count = 0;
4153
4154 SHRINK;
4155 if (RAW == '"') {
4156 NEXT;
4157 stop = '"';
4158 } else if (RAW == '\'') {
4159 NEXT;
4160 stop = '\'';
4161 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004162 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004163 return(NULL);
4164 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004165
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004166 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004167 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004168 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004169 return(NULL);
4170 }
4171 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4172 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004173 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004174 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004175 xmlChar *tmp;
4176
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004177 if ((size > XML_MAX_NAME_LENGTH) &&
4178 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4179 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4180 xmlFree(buf);
4181 ctxt->instate = (xmlParserInputState) state;
4182 return(NULL);
4183 }
Owen Taylor3473f882001-02-23 17:55:21 +00004184 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004185 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4186 if (tmp == NULL) {
4187 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004188 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004189 ctxt->instate = (xmlParserInputState) state;
4190 return(NULL);
4191 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004192 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004193 }
4194 count++;
4195 if (count > 50) {
4196 GROW;
4197 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004198 if (ctxt->instate == XML_PARSER_EOF) {
4199 xmlFree(buf);
4200 return(NULL);
4201 }
Owen Taylor3473f882001-02-23 17:55:21 +00004202 }
4203 COPY_BUF(l,buf,len,cur);
4204 NEXTL(l);
4205 cur = CUR_CHAR(l);
4206 if (cur == 0) {
4207 GROW;
4208 SHRINK;
4209 cur = CUR_CHAR(l);
4210 }
4211 }
4212 buf[len] = 0;
4213 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004214 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004215 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004216 } else {
4217 NEXT;
4218 }
4219 return(buf);
4220}
4221
4222/**
4223 * xmlParsePubidLiteral:
4224 * @ctxt: an XML parser context
4225 *
4226 * parse an XML public literal
4227 *
4228 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4229 *
4230 * Returns the PubidLiteral parsed or NULL.
4231 */
4232
4233xmlChar *
4234xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4235 xmlChar *buf = NULL;
4236 int len = 0;
4237 int size = XML_PARSER_BUFFER_SIZE;
4238 xmlChar cur;
4239 xmlChar stop;
4240 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004241 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004242
4243 SHRINK;
4244 if (RAW == '"') {
4245 NEXT;
4246 stop = '"';
4247 } else if (RAW == '\'') {
4248 NEXT;
4249 stop = '\'';
4250 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004251 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004252 return(NULL);
4253 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004254 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004255 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004256 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004257 return(NULL);
4258 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004259 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004260 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004261 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004262 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004263 xmlChar *tmp;
4264
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004265 if ((size > XML_MAX_NAME_LENGTH) &&
4266 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4267 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4268 xmlFree(buf);
4269 return(NULL);
4270 }
Owen Taylor3473f882001-02-23 17:55:21 +00004271 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004272 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4273 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004274 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004275 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004276 return(NULL);
4277 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004278 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004279 }
4280 buf[len++] = cur;
4281 count++;
4282 if (count > 50) {
4283 GROW;
4284 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004285 if (ctxt->instate == XML_PARSER_EOF) {
4286 xmlFree(buf);
4287 return(NULL);
4288 }
Owen Taylor3473f882001-02-23 17:55:21 +00004289 }
4290 NEXT;
4291 cur = CUR;
4292 if (cur == 0) {
4293 GROW;
4294 SHRINK;
4295 cur = CUR;
4296 }
4297 }
4298 buf[len] = 0;
4299 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004300 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004301 } else {
4302 NEXT;
4303 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004304 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004305 return(buf);
4306}
4307
Daniel Veillard8ed10722009-08-20 19:17:36 +02004308static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004309
4310/*
4311 * used for the test in the inner loop of the char data testing
4312 */
4313static const unsigned char test_char_data[256] = {
4314 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4315 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4316 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4317 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4318 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4319 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4320 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4321 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4322 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4323 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4324 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4325 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4326 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4327 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4328 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4329 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4343 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4346};
4347
Owen Taylor3473f882001-02-23 17:55:21 +00004348/**
4349 * xmlParseCharData:
4350 * @ctxt: an XML parser context
4351 * @cdata: int indicating whether we are within a CDATA section
4352 *
4353 * parse a CharData section.
4354 * if we are within a CDATA section ']]>' marks an end of section.
4355 *
4356 * The right angle bracket (>) may be represented using the string "&gt;",
4357 * and must, for compatibility, be escaped using "&gt;" or a character
4358 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004359 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004360 *
4361 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4362 */
4363
4364void
4365xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004366 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004367 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004368 int line = ctxt->input->line;
4369 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004370 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004371
4372 SHRINK;
4373 GROW;
4374 /*
4375 * Accelerated common case where input don't need to be
4376 * modified before passing it to the handler.
4377 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004378 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004379 in = ctxt->input->cur;
4380 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004381get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004382 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004383 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004384 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004385 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004386 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004387 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004388 goto get_more_space;
4389 }
4390 if (*in == '<') {
4391 nbchar = in - ctxt->input->cur;
4392 if (nbchar > 0) {
4393 const xmlChar *tmp = ctxt->input->cur;
4394 ctxt->input->cur = in;
4395
Daniel Veillard34099b42004-11-04 17:34:35 +00004396 if ((ctxt->sax != NULL) &&
4397 (ctxt->sax->ignorableWhitespace !=
4398 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004399 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004400 if (ctxt->sax->ignorableWhitespace != NULL)
4401 ctxt->sax->ignorableWhitespace(ctxt->userData,
4402 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004403 } else {
4404 if (ctxt->sax->characters != NULL)
4405 ctxt->sax->characters(ctxt->userData,
4406 tmp, nbchar);
4407 if (*ctxt->space == -1)
4408 *ctxt->space = -2;
4409 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004410 } else if ((ctxt->sax != NULL) &&
4411 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004412 ctxt->sax->characters(ctxt->userData,
4413 tmp, nbchar);
4414 }
4415 }
4416 return;
4417 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004418
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004419get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004420 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004421 while (test_char_data[*in]) {
4422 in++;
4423 ccol++;
4424 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004425 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004426 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004427 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004428 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004429 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004430 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004431 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004432 }
4433 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004434 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004435 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004436 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004437 return;
4438 }
4439 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004440 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004441 goto get_more;
4442 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004443 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004444 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004445 if ((ctxt->sax != NULL) &&
4446 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004447 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004448 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004449 const xmlChar *tmp = ctxt->input->cur;
4450 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004451
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004452 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004453 if (ctxt->sax->ignorableWhitespace != NULL)
4454 ctxt->sax->ignorableWhitespace(ctxt->userData,
4455 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004456 } else {
4457 if (ctxt->sax->characters != NULL)
4458 ctxt->sax->characters(ctxt->userData,
4459 tmp, nbchar);
4460 if (*ctxt->space == -1)
4461 *ctxt->space = -2;
4462 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004463 line = ctxt->input->line;
4464 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004465 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004466 if (ctxt->sax->characters != NULL)
4467 ctxt->sax->characters(ctxt->userData,
4468 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004469 line = ctxt->input->line;
4470 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004471 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004472 /* something really bad happened in the SAX callback */
4473 if (ctxt->instate != XML_PARSER_CONTENT)
4474 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004475 }
4476 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004477 if (*in == 0xD) {
4478 in++;
4479 if (*in == 0xA) {
4480 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004481 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004482 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004483 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004484 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004485 in--;
4486 }
4487 if (*in == '<') {
4488 return;
4489 }
4490 if (*in == '&') {
4491 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004492 }
4493 SHRINK;
4494 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004495 if (ctxt->instate == XML_PARSER_EOF)
4496 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004497 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004498 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004499 nbchar = 0;
4500 }
Daniel Veillard50582112001-03-26 22:52:16 +00004501 ctxt->input->line = line;
4502 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004503 xmlParseCharDataComplex(ctxt, cdata);
4504}
4505
Daniel Veillard01c13b52002-12-10 15:19:08 +00004506/**
4507 * xmlParseCharDataComplex:
4508 * @ctxt: an XML parser context
4509 * @cdata: int indicating whether we are within a CDATA section
4510 *
4511 * parse a CharData section.this is the fallback function
4512 * of xmlParseCharData() when the parsing requires handling
4513 * of non-ASCII characters.
4514 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004515static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004516xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004517 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4518 int nbchar = 0;
4519 int cur, l;
4520 int count = 0;
4521
4522 SHRINK;
4523 GROW;
4524 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004525 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004526 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004527 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004528 if ((cur == ']') && (NXT(1) == ']') &&
4529 (NXT(2) == '>')) {
4530 if (cdata) break;
4531 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004532 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004533 }
4534 }
4535 COPY_BUF(l,buf,nbchar,cur);
4536 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004537 buf[nbchar] = 0;
4538
Owen Taylor3473f882001-02-23 17:55:21 +00004539 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004540 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004541 */
4542 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004543 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004544 if (ctxt->sax->ignorableWhitespace != NULL)
4545 ctxt->sax->ignorableWhitespace(ctxt->userData,
4546 buf, nbchar);
4547 } else {
4548 if (ctxt->sax->characters != NULL)
4549 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004550 if ((ctxt->sax->characters !=
4551 ctxt->sax->ignorableWhitespace) &&
4552 (*ctxt->space == -1))
4553 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004554 }
4555 }
4556 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004557 /* something really bad happened in the SAX callback */
4558 if (ctxt->instate != XML_PARSER_CONTENT)
4559 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004560 }
4561 count++;
4562 if (count > 50) {
4563 GROW;
4564 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004565 if (ctxt->instate == XML_PARSER_EOF)
4566 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004567 }
4568 NEXTL(l);
4569 cur = CUR_CHAR(l);
4570 }
4571 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004572 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004573 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004574 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004575 */
4576 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004577 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004578 if (ctxt->sax->ignorableWhitespace != NULL)
4579 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4580 } else {
4581 if (ctxt->sax->characters != NULL)
4582 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004583 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4584 (*ctxt->space == -1))
4585 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004586 }
4587 }
4588 }
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004589 if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004590 /* Generate the error and skip the offending character */
4591 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4592 "PCDATA invalid Char value %d\n",
4593 cur);
4594 NEXTL(l);
4595 }
Owen Taylor3473f882001-02-23 17:55:21 +00004596}
4597
4598/**
4599 * xmlParseExternalID:
4600 * @ctxt: an XML parser context
4601 * @publicID: a xmlChar** receiving PubidLiteral
4602 * @strict: indicate whether we should restrict parsing to only
4603 * production [75], see NOTE below
4604 *
4605 * Parse an External ID or a Public ID
4606 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004607 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004608 * 'PUBLIC' S PubidLiteral S SystemLiteral
4609 *
4610 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4611 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4612 *
4613 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4614 *
4615 * Returns the function returns SystemLiteral and in the second
4616 * case publicID receives PubidLiteral, is strict is off
4617 * it is possible to return NULL and have publicID set.
4618 */
4619
4620xmlChar *
4621xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4622 xmlChar *URI = NULL;
4623
4624 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004625
4626 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004627 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004628 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004629 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004630 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4631 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004632 }
Owen Taylor3473f882001-02-23 17:55:21 +00004633 URI = xmlParseSystemLiteral(ctxt);
4634 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004635 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004636 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004637 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004638 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004639 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004641 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004642 }
Owen Taylor3473f882001-02-23 17:55:21 +00004643 *publicID = xmlParsePubidLiteral(ctxt);
4644 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004645 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004646 }
4647 if (strict) {
4648 /*
4649 * We don't handle [83] so "S SystemLiteral" is required.
4650 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004651 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004652 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004653 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004654 }
4655 } else {
4656 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004657 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004658 * "S SystemLiteral" is not detected. We skip blanks if no
4659 * system literal was found, but this is harmless since we must
4660 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004661 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004662 if (SKIP_BLANKS == 0) return(NULL);
4663 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004664 }
Owen Taylor3473f882001-02-23 17:55:21 +00004665 URI = xmlParseSystemLiteral(ctxt);
4666 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004667 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004668 }
4669 }
4670 return(URI);
4671}
4672
4673/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004674 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004675 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004676 * @buf: the already parsed part of the buffer
4677 * @len: number of bytes filles in the buffer
4678 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004679 *
4680 * Skip an XML (SGML) comment <!-- .... -->
4681 * The spec says that "For compatibility, the string "--" (double-hyphen)
4682 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004683 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004684 *
4685 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4686 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004687static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004688xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4689 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004690 int q, ql;
4691 int r, rl;
4692 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004693 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004694 int inputid;
4695
4696 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004697
Owen Taylor3473f882001-02-23 17:55:21 +00004698 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004699 len = 0;
4700 size = XML_PARSER_BUFFER_SIZE;
4701 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4702 if (buf == NULL) {
4703 xmlErrMemory(ctxt, NULL);
4704 return;
4705 }
Owen Taylor3473f882001-02-23 17:55:21 +00004706 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004707 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004708 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004709 if (q == 0)
4710 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004711 if (!IS_CHAR(q)) {
4712 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4713 "xmlParseComment: invalid xmlChar value %d\n",
4714 q);
4715 xmlFree (buf);
4716 return;
4717 }
Owen Taylor3473f882001-02-23 17:55:21 +00004718 NEXTL(ql);
4719 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004720 if (r == 0)
4721 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004722 if (!IS_CHAR(r)) {
4723 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4724 "xmlParseComment: invalid xmlChar value %d\n",
4725 q);
4726 xmlFree (buf);
4727 return;
4728 }
Owen Taylor3473f882001-02-23 17:55:21 +00004729 NEXTL(rl);
4730 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004731 if (cur == 0)
4732 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004733 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004734 ((cur != '>') ||
4735 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004736 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004737 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004738 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004739 if ((len > XML_MAX_TEXT_LENGTH) &&
4740 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4741 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4742 "Comment too big found", NULL);
4743 xmlFree (buf);
4744 return;
4745 }
Owen Taylor3473f882001-02-23 17:55:21 +00004746 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004747 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004748 size_t new_size;
4749
4750 new_size = size * 2;
4751 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004752 if (new_buf == NULL) {
4753 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004754 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004755 return;
4756 }
William M. Bracka3215c72004-07-31 16:24:01 +00004757 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004758 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004759 }
4760 COPY_BUF(ql,buf,len,q);
4761 q = r;
4762 ql = rl;
4763 r = cur;
4764 rl = l;
4765
4766 count++;
4767 if (count > 50) {
4768 GROW;
4769 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004770 if (ctxt->instate == XML_PARSER_EOF) {
4771 xmlFree(buf);
4772 return;
4773 }
Owen Taylor3473f882001-02-23 17:55:21 +00004774 }
4775 NEXTL(l);
4776 cur = CUR_CHAR(l);
4777 if (cur == 0) {
4778 SHRINK;
4779 GROW;
4780 cur = CUR_CHAR(l);
4781 }
4782 }
4783 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004784 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004785 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004786 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004787 } else if (!IS_CHAR(cur)) {
4788 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4789 "xmlParseComment: invalid xmlChar value %d\n",
4790 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004791 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004792 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004793 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004794 "Comment doesn't start and stop in the same"
4795 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004796 }
4797 NEXT;
4798 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4799 (!ctxt->disableSAX))
4800 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004801 }
Daniel Veillardda629342007-08-01 07:49:06 +00004802 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004803 return;
4804not_terminated:
4805 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4806 "Comment not terminated\n", NULL);
4807 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004808 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004809}
Daniel Veillardda629342007-08-01 07:49:06 +00004810
Daniel Veillard4c778d82005-01-23 17:37:44 +00004811/**
4812 * xmlParseComment:
4813 * @ctxt: an XML parser context
4814 *
4815 * Skip an XML (SGML) comment <!-- .... -->
4816 * The spec says that "For compatibility, the string "--" (double-hyphen)
4817 * must not occur within comments. "
4818 *
4819 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4820 */
4821void
4822xmlParseComment(xmlParserCtxtPtr ctxt) {
4823 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004824 size_t size = XML_PARSER_BUFFER_SIZE;
4825 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004826 xmlParserInputState state;
4827 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004828 size_t nbchar = 0;
4829 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004830 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004831
4832 /*
4833 * Check that there is a comment right here.
4834 */
4835 if ((RAW != '<') || (NXT(1) != '!') ||
4836 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004837 state = ctxt->instate;
4838 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004839 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004840 SKIP(4);
4841 SHRINK;
4842 GROW;
4843
4844 /*
4845 * Accelerated common case where input don't need to be
4846 * modified before passing it to the handler.
4847 */
4848 in = ctxt->input->cur;
4849 do {
4850 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004851 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004852 ctxt->input->line++; ctxt->input->col = 1;
4853 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004854 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004855 }
4856get_more:
4857 ccol = ctxt->input->col;
4858 while (((*in > '-') && (*in <= 0x7F)) ||
4859 ((*in >= 0x20) && (*in < '-')) ||
4860 (*in == 0x09)) {
4861 in++;
4862 ccol++;
4863 }
4864 ctxt->input->col = ccol;
4865 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004866 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004867 ctxt->input->line++; ctxt->input->col = 1;
4868 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004869 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004870 goto get_more;
4871 }
4872 nbchar = in - ctxt->input->cur;
4873 /*
4874 * save current set of data
4875 */
4876 if (nbchar > 0) {
4877 if ((ctxt->sax != NULL) &&
4878 (ctxt->sax->comment != NULL)) {
4879 if (buf == NULL) {
4880 if ((*in == '-') && (in[1] == '-'))
4881 size = nbchar + 1;
4882 else
4883 size = XML_PARSER_BUFFER_SIZE + nbchar;
4884 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4885 if (buf == NULL) {
4886 xmlErrMemory(ctxt, NULL);
4887 ctxt->instate = state;
4888 return;
4889 }
4890 len = 0;
4891 } else if (len + nbchar + 1 >= size) {
4892 xmlChar *new_buf;
4893 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4894 new_buf = (xmlChar *) xmlRealloc(buf,
4895 size * sizeof(xmlChar));
4896 if (new_buf == NULL) {
4897 xmlFree (buf);
4898 xmlErrMemory(ctxt, NULL);
4899 ctxt->instate = state;
4900 return;
4901 }
4902 buf = new_buf;
4903 }
4904 memcpy(&buf[len], ctxt->input->cur, nbchar);
4905 len += nbchar;
4906 buf[len] = 0;
4907 }
4908 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004909 if ((len > XML_MAX_TEXT_LENGTH) &&
4910 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4911 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4912 "Comment too big found", NULL);
4913 xmlFree (buf);
4914 return;
4915 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004916 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004917 if (*in == 0xA) {
4918 in++;
4919 ctxt->input->line++; ctxt->input->col = 1;
4920 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004921 if (*in == 0xD) {
4922 in++;
4923 if (*in == 0xA) {
4924 ctxt->input->cur = in;
4925 in++;
4926 ctxt->input->line++; ctxt->input->col = 1;
4927 continue; /* while */
4928 }
4929 in--;
4930 }
4931 SHRINK;
4932 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004933 if (ctxt->instate == XML_PARSER_EOF) {
4934 xmlFree(buf);
4935 return;
4936 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004937 in = ctxt->input->cur;
4938 if (*in == '-') {
4939 if (in[1] == '-') {
4940 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004941 if (ctxt->input->id != inputid) {
4942 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004943 "comment doesn't start and stop in the"
4944 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00004945 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004946 SKIP(3);
4947 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4948 (!ctxt->disableSAX)) {
4949 if (buf != NULL)
4950 ctxt->sax->comment(ctxt->userData, buf);
4951 else
4952 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4953 }
4954 if (buf != NULL)
4955 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08004956 if (ctxt->instate != XML_PARSER_EOF)
4957 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004958 return;
4959 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004960 if (buf != NULL) {
4961 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962 "Double hyphen within comment: "
4963 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004964 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004965 } else
4966 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4967 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004968 in++;
4969 ctxt->input->col++;
4970 }
4971 in++;
4972 ctxt->input->col++;
4973 goto get_more;
4974 }
4975 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4976 xmlParseCommentComplex(ctxt, buf, len, size);
4977 ctxt->instate = state;
4978 return;
4979}
4980
Owen Taylor3473f882001-02-23 17:55:21 +00004981
4982/**
4983 * xmlParsePITarget:
4984 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004985 *
Owen Taylor3473f882001-02-23 17:55:21 +00004986 * parse the name of a PI
4987 *
4988 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4989 *
4990 * Returns the PITarget name or NULL
4991 */
4992
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004993const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004994xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004995 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004996
4997 name = xmlParseName(ctxt);
4998 if ((name != NULL) &&
4999 ((name[0] == 'x') || (name[0] == 'X')) &&
5000 ((name[1] == 'm') || (name[1] == 'M')) &&
5001 ((name[2] == 'l') || (name[2] == 'L'))) {
5002 int i;
5003 if ((name[0] == 'x') && (name[1] == 'm') &&
5004 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005005 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005006 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005007 return(name);
5008 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005009 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005010 return(name);
5011 }
5012 for (i = 0;;i++) {
5013 if (xmlW3CPIs[i] == NULL) break;
5014 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5015 return(name);
5016 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005017 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5018 "xmlParsePITarget: invalid name prefix 'xml'\n",
5019 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005020 }
Daniel Veillard37334572008-07-31 08:20:02 +00005021 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005022 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005023 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005024 }
Owen Taylor3473f882001-02-23 17:55:21 +00005025 return(name);
5026}
5027
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005028#ifdef LIBXML_CATALOG_ENABLED
5029/**
5030 * xmlParseCatalogPI:
5031 * @ctxt: an XML parser context
5032 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005033 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005034 * parse an XML Catalog Processing Instruction.
5035 *
5036 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5037 *
5038 * Occurs only if allowed by the user and if happening in the Misc
5039 * part of the document before any doctype informations
5040 * This will add the given catalog to the parsing context in order
5041 * to be used if there is a resolution need further down in the document
5042 */
5043
5044static void
5045xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5046 xmlChar *URL = NULL;
5047 const xmlChar *tmp, *base;
5048 xmlChar marker;
5049
5050 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005051 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005052 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5053 goto error;
5054 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005055 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005056 if (*tmp != '=') {
5057 return;
5058 }
5059 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005060 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005061 marker = *tmp;
5062 if ((marker != '\'') && (marker != '"'))
5063 goto error;
5064 tmp++;
5065 base = tmp;
5066 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5067 if (*tmp == 0)
5068 goto error;
5069 URL = xmlStrndup(base, tmp - base);
5070 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005071 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005072 if (*tmp != 0)
5073 goto error;
5074
5075 if (URL != NULL) {
5076 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5077 xmlFree(URL);
5078 }
5079 return;
5080
5081error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005082 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5083 "Catalog PI syntax error: %s\n",
5084 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005085 if (URL != NULL)
5086 xmlFree(URL);
5087}
5088#endif
5089
Owen Taylor3473f882001-02-23 17:55:21 +00005090/**
5091 * xmlParsePI:
5092 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005093 *
Owen Taylor3473f882001-02-23 17:55:21 +00005094 * parse an XML Processing Instruction.
5095 *
5096 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5097 *
5098 * The processing is transfered to SAX once parsed.
5099 */
5100
5101void
5102xmlParsePI(xmlParserCtxtPtr ctxt) {
5103 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005104 size_t len = 0;
5105 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005106 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005107 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005108 xmlParserInputState state;
5109 int count = 0;
5110
5111 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005112 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005113 state = ctxt->instate;
5114 ctxt->instate = XML_PARSER_PI;
5115 /*
5116 * this is a Processing Instruction.
5117 */
5118 SKIP(2);
5119 SHRINK;
5120
5121 /*
5122 * Parse the target name and check for special support like
5123 * namespace.
5124 */
5125 target = xmlParsePITarget(ctxt);
5126 if (target != NULL) {
5127 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005128 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005129 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005130 "PI declaration doesn't start and stop in"
5131 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005132 }
5133 SKIP(2);
5134
5135 /*
5136 * SAX: PI detected.
5137 */
5138 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5139 (ctxt->sax->processingInstruction != NULL))
5140 ctxt->sax->processingInstruction(ctxt->userData,
5141 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005142 if (ctxt->instate != XML_PARSER_EOF)
5143 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005144 return;
5145 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005146 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005147 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005148 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005149 ctxt->instate = state;
5150 return;
5151 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005152 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005153 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5154 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 }
Owen Taylor3473f882001-02-23 17:55:21 +00005156 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005157 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005158 ((cur != '?') || (NXT(1) != '>'))) {
5159 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005160 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005161 size_t new_size = size * 2;
5162 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005163 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005164 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005165 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005166 ctxt->instate = state;
5167 return;
5168 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005169 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005170 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005171 }
5172 count++;
5173 if (count > 50) {
5174 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005175 if (ctxt->instate == XML_PARSER_EOF) {
5176 xmlFree(buf);
5177 return;
5178 }
Owen Taylor3473f882001-02-23 17:55:21 +00005179 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005180 if ((len > XML_MAX_TEXT_LENGTH) &&
5181 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5182 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5183 "PI %s too big found", target);
5184 xmlFree(buf);
5185 ctxt->instate = state;
5186 return;
5187 }
Owen Taylor3473f882001-02-23 17:55:21 +00005188 }
5189 COPY_BUF(l,buf,len,cur);
5190 NEXTL(l);
5191 cur = CUR_CHAR(l);
5192 if (cur == 0) {
5193 SHRINK;
5194 GROW;
5195 cur = CUR_CHAR(l);
5196 }
5197 }
Daniel Veillard51304812012-07-19 20:34:26 +08005198 if ((len > XML_MAX_TEXT_LENGTH) &&
5199 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5200 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5201 "PI %s too big found", target);
5202 xmlFree(buf);
5203 ctxt->instate = state;
5204 return;
5205 }
Owen Taylor3473f882001-02-23 17:55:21 +00005206 buf[len] = 0;
5207 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005208 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5209 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005211 if (inputid != ctxt->input->id) {
5212 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5213 "PI declaration doesn't start and stop in"
5214 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005215 }
5216 SKIP(2);
5217
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005218#ifdef LIBXML_CATALOG_ENABLED
5219 if (((state == XML_PARSER_MISC) ||
5220 (state == XML_PARSER_START)) &&
5221 (xmlStrEqual(target, XML_CATALOG_PI))) {
5222 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5223 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5224 (allow == XML_CATA_ALLOW_ALL))
5225 xmlParseCatalogPI(ctxt, buf);
5226 }
5227#endif
5228
5229
Owen Taylor3473f882001-02-23 17:55:21 +00005230 /*
5231 * SAX: PI detected.
5232 */
5233 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5234 (ctxt->sax->processingInstruction != NULL))
5235 ctxt->sax->processingInstruction(ctxt->userData,
5236 target, buf);
5237 }
5238 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005239 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005240 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 }
Chris Evans77404b82011-12-14 16:18:25 +08005242 if (ctxt->instate != XML_PARSER_EOF)
5243 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005244 }
5245}
5246
5247/**
5248 * xmlParseNotationDecl:
5249 * @ctxt: an XML parser context
5250 *
5251 * parse a notation declaration
5252 *
5253 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5254 *
5255 * Hence there is actually 3 choices:
5256 * 'PUBLIC' S PubidLiteral
5257 * 'PUBLIC' S PubidLiteral S SystemLiteral
5258 * and 'SYSTEM' S SystemLiteral
5259 *
5260 * See the NOTE on xmlParseExternalID().
5261 */
5262
5263void
5264xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005265 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005266 xmlChar *Pubid;
5267 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005268
Daniel Veillarda07050d2003-10-19 14:46:32 +00005269 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005270 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005271 SHRINK;
5272 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005273 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005274 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5275 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005276 return;
5277 }
Owen Taylor3473f882001-02-23 17:55:21 +00005278
Daniel Veillard76d66f42001-05-16 21:05:17 +00005279 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005280 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005281 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 return;
5283 }
Daniel Veillard37334572008-07-31 08:20:02 +00005284 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005285 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005286 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005287 name, NULL, NULL);
5288 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005289 if (SKIP_BLANKS == 0) {
5290 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5291 "Space required after the NOTATION name'\n");
5292 return;
5293 }
Owen Taylor3473f882001-02-23 17:55:21 +00005294
5295 /*
5296 * Parse the IDs.
5297 */
5298 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5299 SKIP_BLANKS;
5300
5301 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005302 if (inputid != ctxt->input->id) {
5303 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5304 "Notation declaration doesn't start and stop"
5305 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005306 }
5307 NEXT;
5308 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5309 (ctxt->sax->notationDecl != NULL))
5310 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5311 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005312 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005313 }
Owen Taylor3473f882001-02-23 17:55:21 +00005314 if (Systemid != NULL) xmlFree(Systemid);
5315 if (Pubid != NULL) xmlFree(Pubid);
5316 }
5317}
5318
5319/**
5320 * xmlParseEntityDecl:
5321 * @ctxt: an XML parser context
5322 *
5323 * parse <!ENTITY declarations
5324 *
5325 * [70] EntityDecl ::= GEDecl | PEDecl
5326 *
5327 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5328 *
5329 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5330 *
5331 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5332 *
5333 * [74] PEDef ::= EntityValue | ExternalID
5334 *
5335 * [76] NDataDecl ::= S 'NDATA' S Name
5336 *
5337 * [ VC: Notation Declared ]
5338 * The Name must match the declared name of a notation.
5339 */
5340
5341void
5342xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005343 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005344 xmlChar *value = NULL;
5345 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005346 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005347 int isParameter = 0;
5348 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005349
Daniel Veillard4c778d82005-01-23 17:37:44 +00005350 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005351 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005352 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005353 SHRINK;
5354 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005355 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005356 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5357 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005358 }
Owen Taylor3473f882001-02-23 17:55:21 +00005359
5360 if (RAW == '%') {
5361 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005362 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005363 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005364 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005365 }
Owen Taylor3473f882001-02-23 17:55:21 +00005366 isParameter = 1;
5367 }
5368
Daniel Veillard76d66f42001-05-16 21:05:17 +00005369 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005370 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005371 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5372 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005373 return;
5374 }
Daniel Veillard37334572008-07-31 08:20:02 +00005375 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005376 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005377 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005378 name, NULL, NULL);
5379 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005380 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005381 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5382 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005383 }
Owen Taylor3473f882001-02-23 17:55:21 +00005384
Daniel Veillardf5582f12002-06-11 10:08:16 +00005385 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005386 /*
5387 * handle the various case of definitions...
5388 */
5389 if (isParameter) {
5390 if ((RAW == '"') || (RAW == '\'')) {
5391 value = xmlParseEntityValue(ctxt, &orig);
5392 if (value) {
5393 if ((ctxt->sax != NULL) &&
5394 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5395 ctxt->sax->entityDecl(ctxt->userData, name,
5396 XML_INTERNAL_PARAMETER_ENTITY,
5397 NULL, NULL, value);
5398 }
5399 } else {
5400 URI = xmlParseExternalID(ctxt, &literal, 1);
5401 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005402 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005403 }
5404 if (URI) {
5405 xmlURIPtr uri;
5406
5407 uri = xmlParseURI((const char *) URI);
5408 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005409 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5410 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005411 /*
5412 * This really ought to be a well formedness error
5413 * but the XML Core WG decided otherwise c.f. issue
5414 * E26 of the XML erratas.
5415 */
Owen Taylor3473f882001-02-23 17:55:21 +00005416 } else {
5417 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005418 /*
5419 * Okay this is foolish to block those but not
5420 * invalid URIs.
5421 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005422 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005423 } else {
5424 if ((ctxt->sax != NULL) &&
5425 (!ctxt->disableSAX) &&
5426 (ctxt->sax->entityDecl != NULL))
5427 ctxt->sax->entityDecl(ctxt->userData, name,
5428 XML_EXTERNAL_PARAMETER_ENTITY,
5429 literal, URI, NULL);
5430 }
5431 xmlFreeURI(uri);
5432 }
5433 }
5434 }
5435 } else {
5436 if ((RAW == '"') || (RAW == '\'')) {
5437 value = xmlParseEntityValue(ctxt, &orig);
5438 if ((ctxt->sax != NULL) &&
5439 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5440 ctxt->sax->entityDecl(ctxt->userData, name,
5441 XML_INTERNAL_GENERAL_ENTITY,
5442 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005443 /*
5444 * For expat compatibility in SAX mode.
5445 */
5446 if ((ctxt->myDoc == NULL) ||
5447 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5448 if (ctxt->myDoc == NULL) {
5449 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005450 if (ctxt->myDoc == NULL) {
5451 xmlErrMemory(ctxt, "New Doc failed");
5452 return;
5453 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005454 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005455 }
5456 if (ctxt->myDoc->intSubset == NULL)
5457 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5458 BAD_CAST "fake", NULL, NULL);
5459
Daniel Veillard1af9a412003-08-20 22:54:39 +00005460 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5461 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005462 }
Owen Taylor3473f882001-02-23 17:55:21 +00005463 } else {
5464 URI = xmlParseExternalID(ctxt, &literal, 1);
5465 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005466 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005467 }
5468 if (URI) {
5469 xmlURIPtr uri;
5470
5471 uri = xmlParseURI((const char *)URI);
5472 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005473 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5474 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005475 /*
5476 * This really ought to be a well formedness error
5477 * but the XML Core WG decided otherwise c.f. issue
5478 * E26 of the XML erratas.
5479 */
Owen Taylor3473f882001-02-23 17:55:21 +00005480 } else {
5481 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005482 /*
5483 * Okay this is foolish to block those but not
5484 * invalid URIs.
5485 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005486 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005487 }
5488 xmlFreeURI(uri);
5489 }
5490 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005491 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005492 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5493 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005494 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005495 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005496 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005497 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005498 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5499 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005500 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005501 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005502 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5503 (ctxt->sax->unparsedEntityDecl != NULL))
5504 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5505 literal, URI, ndata);
5506 } else {
5507 if ((ctxt->sax != NULL) &&
5508 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5509 ctxt->sax->entityDecl(ctxt->userData, name,
5510 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5511 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005512 /*
5513 * For expat compatibility in SAX mode.
5514 * assuming the entity repalcement was asked for
5515 */
5516 if ((ctxt->replaceEntities != 0) &&
5517 ((ctxt->myDoc == NULL) ||
5518 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5519 if (ctxt->myDoc == NULL) {
5520 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005521 if (ctxt->myDoc == NULL) {
5522 xmlErrMemory(ctxt, "New Doc failed");
5523 return;
5524 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005525 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005526 }
5527
5528 if (ctxt->myDoc->intSubset == NULL)
5529 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5530 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005531 xmlSAX2EntityDecl(ctxt, name,
5532 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5533 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005534 }
Owen Taylor3473f882001-02-23 17:55:21 +00005535 }
5536 }
5537 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005538 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005539 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005540 SKIP_BLANKS;
5541 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005542 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005543 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005544 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005545 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005546 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005547 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005548 "Entity declaration doesn't start and stop in"
5549 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005550 }
5551 NEXT;
5552 }
5553 if (orig != NULL) {
5554 /*
5555 * Ugly mechanism to save the raw entity value.
5556 */
5557 xmlEntityPtr cur = NULL;
5558
5559 if (isParameter) {
5560 if ((ctxt->sax != NULL) &&
5561 (ctxt->sax->getParameterEntity != NULL))
5562 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5563 } else {
5564 if ((ctxt->sax != NULL) &&
5565 (ctxt->sax->getEntity != NULL))
5566 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005567 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005568 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005569 }
Owen Taylor3473f882001-02-23 17:55:21 +00005570 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005571 if ((cur != NULL) && (cur->orig == NULL)) {
5572 cur->orig = orig;
5573 orig = NULL;
5574 }
Owen Taylor3473f882001-02-23 17:55:21 +00005575 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005576
5577done:
Owen Taylor3473f882001-02-23 17:55:21 +00005578 if (value != NULL) xmlFree(value);
5579 if (URI != NULL) xmlFree(URI);
5580 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005581 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005582 }
5583}
5584
5585/**
5586 * xmlParseDefaultDecl:
5587 * @ctxt: an XML parser context
5588 * @value: Receive a possible fixed default value for the attribute
5589 *
5590 * Parse an attribute default declaration
5591 *
5592 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5593 *
5594 * [ VC: Required Attribute ]
5595 * if the default declaration is the keyword #REQUIRED, then the
5596 * attribute must be specified for all elements of the type in the
5597 * attribute-list declaration.
5598 *
5599 * [ VC: Attribute Default Legal ]
5600 * The declared default value must meet the lexical constraints of
5601 * the declared attribute type c.f. xmlValidateAttributeDecl()
5602 *
5603 * [ VC: Fixed Attribute Default ]
5604 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005605 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005606 *
5607 * [ WFC: No < in Attribute Values ]
5608 * handled in xmlParseAttValue()
5609 *
5610 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005611 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005612 */
5613
5614int
5615xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5616 int val;
5617 xmlChar *ret;
5618
5619 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005620 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005621 SKIP(9);
5622 return(XML_ATTRIBUTE_REQUIRED);
5623 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005624 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005625 SKIP(8);
5626 return(XML_ATTRIBUTE_IMPLIED);
5627 }
5628 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005629 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005630 SKIP(6);
5631 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005632 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005633 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005635 }
Owen Taylor3473f882001-02-23 17:55:21 +00005636 }
5637 ret = xmlParseAttValue(ctxt);
5638 ctxt->instate = XML_PARSER_DTD;
5639 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005640 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005641 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005642 } else
5643 *value = ret;
5644 return(val);
5645}
5646
5647/**
5648 * xmlParseNotationType:
5649 * @ctxt: an XML parser context
5650 *
5651 * parse an Notation attribute type.
5652 *
5653 * Note: the leading 'NOTATION' S part has already being parsed...
5654 *
5655 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5656 *
5657 * [ VC: Notation Attributes ]
5658 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005659 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005660 *
5661 * Returns: the notation attribute tree built while parsing
5662 */
5663
5664xmlEnumerationPtr
5665xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005666 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005667 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005668
5669 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005670 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005671 return(NULL);
5672 }
5673 SHRINK;
5674 do {
5675 NEXT;
5676 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005677 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005678 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005679 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5680 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005681 xmlFreeEnumeration(ret);
5682 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005683 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005684 tmp = ret;
5685 while (tmp != NULL) {
5686 if (xmlStrEqual(name, tmp->name)) {
5687 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5688 "standalone: attribute notation value token %s duplicated\n",
5689 name, NULL);
5690 if (!xmlDictOwns(ctxt->dict, name))
5691 xmlFree((xmlChar *) name);
5692 break;
5693 }
5694 tmp = tmp->next;
5695 }
5696 if (tmp == NULL) {
5697 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005698 if (cur == NULL) {
5699 xmlFreeEnumeration(ret);
5700 return(NULL);
5701 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005702 if (last == NULL) ret = last = cur;
5703 else {
5704 last->next = cur;
5705 last = cur;
5706 }
Owen Taylor3473f882001-02-23 17:55:21 +00005707 }
5708 SKIP_BLANKS;
5709 } while (RAW == '|');
5710 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005711 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005712 xmlFreeEnumeration(ret);
5713 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005714 }
5715 NEXT;
5716 return(ret);
5717}
5718
5719/**
5720 * xmlParseEnumerationType:
5721 * @ctxt: an XML parser context
5722 *
5723 * parse an Enumeration attribute type.
5724 *
5725 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5726 *
5727 * [ VC: Enumeration ]
5728 * Values of this type must match one of the Nmtoken tokens in
5729 * the declaration
5730 *
5731 * Returns: the enumeration attribute tree built while parsing
5732 */
5733
5734xmlEnumerationPtr
5735xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5736 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005737 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005738
5739 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005740 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005741 return(NULL);
5742 }
5743 SHRINK;
5744 do {
5745 NEXT;
5746 SKIP_BLANKS;
5747 name = xmlParseNmtoken(ctxt);
5748 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005749 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 return(ret);
5751 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005752 tmp = ret;
5753 while (tmp != NULL) {
5754 if (xmlStrEqual(name, tmp->name)) {
5755 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5756 "standalone: attribute enumeration value token %s duplicated\n",
5757 name, NULL);
5758 if (!xmlDictOwns(ctxt->dict, name))
5759 xmlFree(name);
5760 break;
5761 }
5762 tmp = tmp->next;
5763 }
5764 if (tmp == NULL) {
5765 cur = xmlCreateEnumeration(name);
5766 if (!xmlDictOwns(ctxt->dict, name))
5767 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005768 if (cur == NULL) {
5769 xmlFreeEnumeration(ret);
5770 return(NULL);
5771 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005772 if (last == NULL) ret = last = cur;
5773 else {
5774 last->next = cur;
5775 last = cur;
5776 }
Owen Taylor3473f882001-02-23 17:55:21 +00005777 }
5778 SKIP_BLANKS;
5779 } while (RAW == '|');
5780 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005781 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005782 return(ret);
5783 }
5784 NEXT;
5785 return(ret);
5786}
5787
5788/**
5789 * xmlParseEnumeratedType:
5790 * @ctxt: an XML parser context
5791 * @tree: the enumeration tree built while parsing
5792 *
5793 * parse an Enumerated attribute type.
5794 *
5795 * [57] EnumeratedType ::= NotationType | Enumeration
5796 *
5797 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798 *
5799 *
5800 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5801 */
5802
5803int
5804xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005805 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005806 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005807 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005808 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5809 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005810 return(0);
5811 }
Owen Taylor3473f882001-02-23 17:55:21 +00005812 *tree = xmlParseNotationType(ctxt);
5813 if (*tree == NULL) return(0);
5814 return(XML_ATTRIBUTE_NOTATION);
5815 }
5816 *tree = xmlParseEnumerationType(ctxt);
5817 if (*tree == NULL) return(0);
5818 return(XML_ATTRIBUTE_ENUMERATION);
5819}
5820
5821/**
5822 * xmlParseAttributeType:
5823 * @ctxt: an XML parser context
5824 * @tree: the enumeration tree built while parsing
5825 *
5826 * parse the Attribute list def for an element
5827 *
5828 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5829 *
5830 * [55] StringType ::= 'CDATA'
5831 *
5832 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5833 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5834 *
5835 * Validity constraints for attribute values syntax are checked in
5836 * xmlValidateAttributeValue()
5837 *
5838 * [ VC: ID ]
5839 * Values of type ID must match the Name production. A name must not
5840 * appear more than once in an XML document as a value of this type;
5841 * i.e., ID values must uniquely identify the elements which bear them.
5842 *
5843 * [ VC: One ID per Element Type ]
5844 * No element type may have more than one ID attribute specified.
5845 *
5846 * [ VC: ID Attribute Default ]
5847 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5848 *
5849 * [ VC: IDREF ]
5850 * Values of type IDREF must match the Name production, and values
5851 * of type IDREFS must match Names; each IDREF Name must match the value
5852 * of an ID attribute on some element in the XML document; i.e. IDREF
5853 * values must match the value of some ID attribute.
5854 *
5855 * [ VC: Entity Name ]
5856 * Values of type ENTITY must match the Name production, values
5857 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005858 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005859 *
5860 * [ VC: Name Token ]
5861 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005862 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005863 *
5864 * Returns the attribute type
5865 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005866int
Owen Taylor3473f882001-02-23 17:55:21 +00005867xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5868 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005869 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005870 SKIP(5);
5871 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005872 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005873 SKIP(6);
5874 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005875 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005876 SKIP(5);
5877 return(XML_ATTRIBUTE_IDREF);
5878 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5879 SKIP(2);
5880 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005881 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005882 SKIP(6);
5883 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005884 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005885 SKIP(8);
5886 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005887 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005888 SKIP(8);
5889 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005890 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005891 SKIP(7);
5892 return(XML_ATTRIBUTE_NMTOKEN);
5893 }
5894 return(xmlParseEnumeratedType(ctxt, tree));
5895}
5896
5897/**
5898 * xmlParseAttributeListDecl:
5899 * @ctxt: an XML parser context
5900 *
5901 * : parse the Attribute list def for an element
5902 *
5903 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5904 *
5905 * [53] AttDef ::= S Name S AttType S DefaultDecl
5906 *
5907 */
5908void
5909xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005910 const xmlChar *elemName;
5911 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005912 xmlEnumerationPtr tree;
5913
Daniel Veillarda07050d2003-10-19 14:46:32 +00005914 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005915 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005916
5917 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005918 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005919 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005920 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005921 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005922 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005923 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005924 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5925 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005926 return;
5927 }
5928 SKIP_BLANKS;
5929 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005930 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005931 int type;
5932 int def;
5933 xmlChar *defaultValue = NULL;
5934
5935 GROW;
5936 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005937 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005938 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005939 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5940 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005941 break;
5942 }
5943 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005944 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005946 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005947 break;
5948 }
Owen Taylor3473f882001-02-23 17:55:21 +00005949
5950 type = xmlParseAttributeType(ctxt, &tree);
5951 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005952 break;
5953 }
5954
5955 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005956 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005957 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5958 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005959 if (tree != NULL)
5960 xmlFreeEnumeration(tree);
5961 break;
5962 }
Owen Taylor3473f882001-02-23 17:55:21 +00005963
5964 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5965 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005966 if (defaultValue != NULL)
5967 xmlFree(defaultValue);
5968 if (tree != NULL)
5969 xmlFreeEnumeration(tree);
5970 break;
5971 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005972 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5973 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005974
5975 GROW;
5976 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005977 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005978 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005979 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005980 if (defaultValue != NULL)
5981 xmlFree(defaultValue);
5982 if (tree != NULL)
5983 xmlFreeEnumeration(tree);
5984 break;
5985 }
Owen Taylor3473f882001-02-23 17:55:21 +00005986 }
Owen Taylor3473f882001-02-23 17:55:21 +00005987 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5988 (ctxt->sax->attributeDecl != NULL))
5989 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5990 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005991 else if (tree != NULL)
5992 xmlFreeEnumeration(tree);
5993
5994 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005995 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00005996 (def != XML_ATTRIBUTE_REQUIRED)) {
5997 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5998 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005999 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006000 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6001 }
Owen Taylor3473f882001-02-23 17:55:21 +00006002 if (defaultValue != NULL)
6003 xmlFree(defaultValue);
6004 GROW;
6005 }
6006 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006007 if (inputid != ctxt->input->id) {
6008 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6009 "Attribute list declaration doesn't start and"
6010 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006011 }
6012 NEXT;
6013 }
Owen Taylor3473f882001-02-23 17:55:21 +00006014 }
6015}
6016
6017/**
6018 * xmlParseElementMixedContentDecl:
6019 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006020 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006021 *
6022 * parse the declaration for a Mixed Element content
6023 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006024 *
Owen Taylor3473f882001-02-23 17:55:21 +00006025 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6026 * '(' S? '#PCDATA' S? ')'
6027 *
6028 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6029 *
6030 * [ VC: No Duplicate Types ]
6031 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006032 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006033 *
6034 * returns: the list of the xmlElementContentPtr describing the element choices
6035 */
6036xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006037xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006038 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006039 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006040
6041 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006042 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006043 SKIP(7);
6044 SKIP_BLANKS;
6045 SHRINK;
6046 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006047 if (ctxt->input->id != inputchk) {
6048 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6049 "Element content declaration doesn't start and"
6050 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006051 }
Owen Taylor3473f882001-02-23 17:55:21 +00006052 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006053 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006054 if (ret == NULL)
6055 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006056 if (RAW == '*') {
6057 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6058 NEXT;
6059 }
6060 return(ret);
6061 }
6062 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006063 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006064 if (ret == NULL) return(NULL);
6065 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006066 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006067 NEXT;
6068 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006069 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006070 if (ret == NULL) return(NULL);
6071 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006072 if (cur != NULL)
6073 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006074 cur = ret;
6075 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006076 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006077 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006078 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006079 if (n->c1 != NULL)
6080 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006081 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006082 if (n != NULL)
6083 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006084 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006085 }
6086 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006087 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006088 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006089 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006090 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006091 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006092 return(NULL);
6093 }
6094 SKIP_BLANKS;
6095 GROW;
6096 }
6097 if ((RAW == ')') && (NXT(1) == '*')) {
6098 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006099 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006100 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006101 if (cur->c2 != NULL)
6102 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006103 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006104 if (ret != NULL)
6105 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006106 if (ctxt->input->id != inputchk) {
6107 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6108 "Element content declaration doesn't start and"
6109 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006110 }
Owen Taylor3473f882001-02-23 17:55:21 +00006111 SKIP(2);
6112 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006113 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006114 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006115 return(NULL);
6116 }
6117
6118 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006119 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006120 }
6121 return(ret);
6122}
6123
6124/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006125 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006126 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006127 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006128 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006129 *
6130 * parse the declaration for a Mixed Element content
6131 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006132 *
Owen Taylor3473f882001-02-23 17:55:21 +00006133 *
6134 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6135 *
6136 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6137 *
6138 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6139 *
6140 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6141 *
6142 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6143 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006144 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006145 * opening or closing parentheses in a choice, seq, or Mixed
6146 * construct is contained in the replacement text for a parameter
6147 * entity, both must be contained in the same replacement text. For
6148 * interoperability, if a parameter-entity reference appears in a
6149 * choice, seq, or Mixed construct, its replacement text should not
6150 * be empty, and neither the first nor last non-blank character of
6151 * the replacement text should be a connector (| or ,).
6152 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006153 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006154 * hierarchy.
6155 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006156static xmlElementContentPtr
6157xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6158 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006159 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006160 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006161 xmlChar type = 0;
6162
Daniel Veillard489f9672009-08-10 16:49:30 +02006163 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6164 (depth > 2048)) {
6165 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6166"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6167 depth);
6168 return(NULL);
6169 }
Owen Taylor3473f882001-02-23 17:55:21 +00006170 SKIP_BLANKS;
6171 GROW;
6172 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006173 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006174
Owen Taylor3473f882001-02-23 17:55:21 +00006175 /* Recurse on first child */
6176 NEXT;
6177 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006178 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6179 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006180 SKIP_BLANKS;
6181 GROW;
6182 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006183 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006184 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006185 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006186 return(NULL);
6187 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006188 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006189 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006190 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006191 return(NULL);
6192 }
Owen Taylor3473f882001-02-23 17:55:21 +00006193 GROW;
6194 if (RAW == '?') {
6195 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6196 NEXT;
6197 } else if (RAW == '*') {
6198 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6199 NEXT;
6200 } else if (RAW == '+') {
6201 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6202 NEXT;
6203 } else {
6204 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6205 }
Owen Taylor3473f882001-02-23 17:55:21 +00006206 GROW;
6207 }
6208 SKIP_BLANKS;
6209 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006210 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006211 /*
6212 * Each loop we parse one separator and one element.
6213 */
6214 if (RAW == ',') {
6215 if (type == 0) type = CUR;
6216
6217 /*
6218 * Detect "Name | Name , Name" error
6219 */
6220 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006221 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006222 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006223 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006224 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006225 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006226 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006227 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006228 return(NULL);
6229 }
6230 NEXT;
6231
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006232 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006233 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006234 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006235 xmlFreeDocElementContent(ctxt->myDoc, last);
6236 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006237 return(NULL);
6238 }
6239 if (last == NULL) {
6240 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006241 if (ret != NULL)
6242 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006243 ret = cur = op;
6244 } else {
6245 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006246 if (op != NULL)
6247 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006248 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006249 if (last != NULL)
6250 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006251 cur =op;
6252 last = NULL;
6253 }
6254 } else if (RAW == '|') {
6255 if (type == 0) type = CUR;
6256
6257 /*
6258 * Detect "Name , Name | Name" error
6259 */
6260 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006261 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006262 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006263 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006264 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006265 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006266 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006267 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 return(NULL);
6269 }
6270 NEXT;
6271
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006272 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006273 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006274 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006275 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006276 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006277 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006278 return(NULL);
6279 }
6280 if (last == NULL) {
6281 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006282 if (ret != NULL)
6283 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006284 ret = cur = op;
6285 } else {
6286 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006287 if (op != NULL)
6288 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006289 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006290 if (last != NULL)
6291 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006292 cur =op;
6293 last = NULL;
6294 }
6295 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006296 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006297 if ((last != NULL) && (last != ret))
6298 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006299 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006300 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006301 return(NULL);
6302 }
6303 GROW;
6304 SKIP_BLANKS;
6305 GROW;
6306 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006307 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006308 /* Recurse on second child */
6309 NEXT;
6310 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006311 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6312 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006313 SKIP_BLANKS;
6314 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006315 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006316 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006317 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006318 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006319 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006320 return(NULL);
6321 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006322 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006323 if (last == NULL) {
6324 if (ret != NULL)
6325 xmlFreeDocElementContent(ctxt->myDoc, ret);
6326 return(NULL);
6327 }
Owen Taylor3473f882001-02-23 17:55:21 +00006328 if (RAW == '?') {
6329 last->ocur = XML_ELEMENT_CONTENT_OPT;
6330 NEXT;
6331 } else if (RAW == '*') {
6332 last->ocur = XML_ELEMENT_CONTENT_MULT;
6333 NEXT;
6334 } else if (RAW == '+') {
6335 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6336 NEXT;
6337 } else {
6338 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6339 }
6340 }
6341 SKIP_BLANKS;
6342 GROW;
6343 }
6344 if ((cur != NULL) && (last != NULL)) {
6345 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006346 if (last != NULL)
6347 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006348 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006349 if (ctxt->input->id != inputchk) {
6350 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6351 "Element content declaration doesn't start and stop in"
6352 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006353 }
Owen Taylor3473f882001-02-23 17:55:21 +00006354 NEXT;
6355 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006356 if (ret != NULL) {
6357 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6358 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6359 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6360 else
6361 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6362 }
Owen Taylor3473f882001-02-23 17:55:21 +00006363 NEXT;
6364 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006365 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006366 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006367 cur = ret;
6368 /*
6369 * Some normalization:
6370 * (a | b* | c?)* == (a | b | c)*
6371 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006372 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006373 if ((cur->c1 != NULL) &&
6374 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6375 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6376 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6377 if ((cur->c2 != NULL) &&
6378 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6379 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6380 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6381 cur = cur->c2;
6382 }
6383 }
Owen Taylor3473f882001-02-23 17:55:21 +00006384 NEXT;
6385 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006386 if (ret != NULL) {
6387 int found = 0;
6388
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006389 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6390 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6391 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006392 else
6393 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006394 /*
6395 * Some normalization:
6396 * (a | b*)+ == (a | b)*
6397 * (a | b?)+ == (a | b)*
6398 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006399 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006400 if ((cur->c1 != NULL) &&
6401 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6402 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6403 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6404 found = 1;
6405 }
6406 if ((cur->c2 != NULL) &&
6407 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6408 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6409 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6410 found = 1;
6411 }
6412 cur = cur->c2;
6413 }
6414 if (found)
6415 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6416 }
Owen Taylor3473f882001-02-23 17:55:21 +00006417 NEXT;
6418 }
6419 return(ret);
6420}
6421
6422/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006423 * xmlParseElementChildrenContentDecl:
6424 * @ctxt: an XML parser context
6425 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006426 *
6427 * parse the declaration for a Mixed Element content
6428 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6429 *
6430 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6431 *
6432 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6433 *
6434 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6435 *
6436 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6437 *
6438 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6439 * TODO Parameter-entity replacement text must be properly nested
6440 * with parenthesized groups. That is to say, if either of the
6441 * opening or closing parentheses in a choice, seq, or Mixed
6442 * construct is contained in the replacement text for a parameter
6443 * entity, both must be contained in the same replacement text. For
6444 * interoperability, if a parameter-entity reference appears in a
6445 * choice, seq, or Mixed construct, its replacement text should not
6446 * be empty, and neither the first nor last non-blank character of
6447 * the replacement text should be a connector (| or ,).
6448 *
6449 * Returns the tree of xmlElementContentPtr describing the element
6450 * hierarchy.
6451 */
6452xmlElementContentPtr
6453xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6454 /* stub left for API/ABI compat */
6455 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6456}
6457
6458/**
Owen Taylor3473f882001-02-23 17:55:21 +00006459 * xmlParseElementContentDecl:
6460 * @ctxt: an XML parser context
6461 * @name: the name of the element being defined.
6462 * @result: the Element Content pointer will be stored here if any
6463 *
6464 * parse the declaration for an Element content either Mixed or Children,
6465 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006466 *
Owen Taylor3473f882001-02-23 17:55:21 +00006467 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6468 *
6469 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6470 */
6471
6472int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006473xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006474 xmlElementContentPtr *result) {
6475
6476 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006477 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006478 int res;
6479
6480 *result = NULL;
6481
6482 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006483 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006484 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006485 return(-1);
6486 }
6487 NEXT;
6488 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006489 if (ctxt->instate == XML_PARSER_EOF)
6490 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006491 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006492 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006493 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006494 res = XML_ELEMENT_TYPE_MIXED;
6495 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006496 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006497 res = XML_ELEMENT_TYPE_ELEMENT;
6498 }
Owen Taylor3473f882001-02-23 17:55:21 +00006499 SKIP_BLANKS;
6500 *result = tree;
6501 return(res);
6502}
6503
6504/**
6505 * xmlParseElementDecl:
6506 * @ctxt: an XML parser context
6507 *
6508 * parse an Element declaration.
6509 *
6510 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6511 *
6512 * [ VC: Unique Element Type Declaration ]
6513 * No element type may be declared more than once
6514 *
6515 * Returns the type of the element, or -1 in case of error
6516 */
6517int
6518xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006519 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006520 int ret = -1;
6521 xmlElementContentPtr content = NULL;
6522
Daniel Veillard4c778d82005-01-23 17:37:44 +00006523 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006524 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006525 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006526
6527 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006528 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6530 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006531 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006532 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006533 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006534 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006535 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6536 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006537 return(-1);
6538 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006539 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006540 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6541 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006542 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006543 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006544 SKIP(5);
6545 /*
6546 * Element must always be empty.
6547 */
6548 ret = XML_ELEMENT_TYPE_EMPTY;
6549 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6550 (NXT(2) == 'Y')) {
6551 SKIP(3);
6552 /*
6553 * Element is a generic container.
6554 */
6555 ret = XML_ELEMENT_TYPE_ANY;
6556 } else if (RAW == '(') {
6557 ret = xmlParseElementContentDecl(ctxt, name, &content);
6558 } else {
6559 /*
6560 * [ WFC: PEs in Internal Subset ] error handling.
6561 */
6562 if ((RAW == '%') && (ctxt->external == 0) &&
6563 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006564 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006565 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006566 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006567 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006568 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6569 }
Owen Taylor3473f882001-02-23 17:55:21 +00006570 return(-1);
6571 }
6572
6573 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006574
6575 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006576 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006577 if (content != NULL) {
6578 xmlFreeDocElementContent(ctxt->myDoc, content);
6579 }
Owen Taylor3473f882001-02-23 17:55:21 +00006580 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006581 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006582 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006583 "Element declaration doesn't start and stop in"
6584 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006585 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006586
Owen Taylor3473f882001-02-23 17:55:21 +00006587 NEXT;
6588 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006589 (ctxt->sax->elementDecl != NULL)) {
6590 if (content != NULL)
6591 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006592 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6593 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006594 if ((content != NULL) && (content->parent == NULL)) {
6595 /*
6596 * this is a trick: if xmlAddElementDecl is called,
6597 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006598 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006599 * interfaces or change the API/ABI
6600 */
6601 xmlFreeDocElementContent(ctxt->myDoc, content);
6602 }
6603 } else if (content != NULL) {
6604 xmlFreeDocElementContent(ctxt->myDoc, content);
6605 }
Owen Taylor3473f882001-02-23 17:55:21 +00006606 }
Owen Taylor3473f882001-02-23 17:55:21 +00006607 }
6608 return(ret);
6609}
6610
6611/**
Owen Taylor3473f882001-02-23 17:55:21 +00006612 * xmlParseConditionalSections
6613 * @ctxt: an XML parser context
6614 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006615 * [61] conditionalSect ::= includeSect | ignoreSect
6616 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006617 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6618 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6619 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6620 */
6621
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006622static void
Owen Taylor3473f882001-02-23 17:55:21 +00006623xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006624 int id = ctxt->input->id;
6625
Owen Taylor3473f882001-02-23 17:55:21 +00006626 SKIP(3);
6627 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006628 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006629 SKIP(7);
6630 SKIP_BLANKS;
6631 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006632 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006633 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006634 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006635 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006636 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006637 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6638 "All markup of the conditional section is not"
6639 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006640 }
Owen Taylor3473f882001-02-23 17:55:21 +00006641 NEXT;
6642 }
6643 if (xmlParserDebugEntities) {
6644 if ((ctxt->input != NULL) && (ctxt->input->filename))
6645 xmlGenericError(xmlGenericErrorContext,
6646 "%s(%d): ", ctxt->input->filename,
6647 ctxt->input->line);
6648 xmlGenericError(xmlGenericErrorContext,
6649 "Entering INCLUDE Conditional Section\n");
6650 }
6651
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006652 SKIP_BLANKS;
6653 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006654 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6655 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006656 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006657 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006658
6659 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6660 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006661 } else
6662 xmlParseMarkupDecl(ctxt);
6663
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006664 SKIP_BLANKS;
6665 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006666
Daniel Veillardfdc91562002-07-01 21:52:03 +00006667 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006668 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006669 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006670 break;
6671 }
6672 }
6673 if (xmlParserDebugEntities) {
6674 if ((ctxt->input != NULL) && (ctxt->input->filename))
6675 xmlGenericError(xmlGenericErrorContext,
6676 "%s(%d): ", ctxt->input->filename,
6677 ctxt->input->line);
6678 xmlGenericError(xmlGenericErrorContext,
6679 "Leaving INCLUDE Conditional Section\n");
6680 }
6681
Daniel Veillarda07050d2003-10-19 14:46:32 +00006682 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006683 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006684 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006685 int depth = 0;
6686
6687 SKIP(6);
6688 SKIP_BLANKS;
6689 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006690 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006691 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006692 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006693 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006694 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006695 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6696 "All markup of the conditional section is not"
6697 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006698 }
Owen Taylor3473f882001-02-23 17:55:21 +00006699 NEXT;
6700 }
6701 if (xmlParserDebugEntities) {
6702 if ((ctxt->input != NULL) && (ctxt->input->filename))
6703 xmlGenericError(xmlGenericErrorContext,
6704 "%s(%d): ", ctxt->input->filename,
6705 ctxt->input->line);
6706 xmlGenericError(xmlGenericErrorContext,
6707 "Entering IGNORE Conditional Section\n");
6708 }
6709
6710 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006711 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006712 * But disable SAX event generating DTD building in the meantime
6713 */
6714 state = ctxt->disableSAX;
6715 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006716 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006717 ctxt->instate = XML_PARSER_IGNORE;
6718
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006719 while (((depth >= 0) && (RAW != 0)) &&
6720 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006721 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6722 depth++;
6723 SKIP(3);
6724 continue;
6725 }
6726 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6727 if (--depth >= 0) SKIP(3);
6728 continue;
6729 }
6730 NEXT;
6731 continue;
6732 }
6733
6734 ctxt->disableSAX = state;
6735 ctxt->instate = instate;
6736
6737 if (xmlParserDebugEntities) {
6738 if ((ctxt->input != NULL) && (ctxt->input->filename))
6739 xmlGenericError(xmlGenericErrorContext,
6740 "%s(%d): ", ctxt->input->filename,
6741 ctxt->input->line);
6742 xmlGenericError(xmlGenericErrorContext,
6743 "Leaving IGNORE Conditional Section\n");
6744 }
6745
6746 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006747 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006748 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006749 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006750 }
6751
6752 if (RAW == 0)
6753 SHRINK;
6754
6755 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006756 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006757 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006758 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006759 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6760 "All markup of the conditional section is not in"
6761 " the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006762 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006763 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006764 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006765 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006766 }
6767}
6768
6769/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006770 * xmlParseMarkupDecl:
6771 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006772 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006773 * parse Markup declarations
6774 *
6775 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6776 * NotationDecl | PI | Comment
6777 *
6778 * [ VC: Proper Declaration/PE Nesting ]
6779 * Parameter-entity replacement text must be properly nested with
6780 * markup declarations. That is to say, if either the first character
6781 * or the last character of a markup declaration (markupdecl above) is
6782 * contained in the replacement text for a parameter-entity reference,
6783 * both must be contained in the same replacement text.
6784 *
6785 * [ WFC: PEs in Internal Subset ]
6786 * In the internal DTD subset, parameter-entity references can occur
6787 * only where markup declarations can occur, not within markup declarations.
6788 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006789 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006790 */
6791void
6792xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6793 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006794 if (CUR == '<') {
6795 if (NXT(1) == '!') {
6796 switch (NXT(2)) {
6797 case 'E':
6798 if (NXT(3) == 'L')
6799 xmlParseElementDecl(ctxt);
6800 else if (NXT(3) == 'N')
6801 xmlParseEntityDecl(ctxt);
6802 break;
6803 case 'A':
6804 xmlParseAttributeListDecl(ctxt);
6805 break;
6806 case 'N':
6807 xmlParseNotationDecl(ctxt);
6808 break;
6809 case '-':
6810 xmlParseComment(ctxt);
6811 break;
6812 default:
6813 /* there is an error but it will be detected later */
6814 break;
6815 }
6816 } else if (NXT(1) == '?') {
6817 xmlParsePI(ctxt);
6818 }
6819 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006820
6821 /*
6822 * detect requirement to exit there and act accordingly
6823 * and avoid having instate overriden later on
6824 */
6825 if (ctxt->instate == XML_PARSER_EOF)
6826 return;
6827
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006828 /*
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006829 * Conditional sections are allowed from entities included
6830 * by PE References in the internal subset.
6831 */
6832 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6833 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6834 xmlParseConditionalSections(ctxt);
6835 }
6836 }
6837
6838 ctxt->instate = XML_PARSER_DTD;
6839}
6840
6841/**
6842 * xmlParseTextDecl:
6843 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006844 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006845 * parse an XML declaration header for external entities
6846 *
6847 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006848 */
6849
6850void
6851xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6852 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006853 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006854
6855 /*
6856 * We know that '<?xml' is here.
6857 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006858 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006859 SKIP(5);
6860 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006861 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006862 return;
6863 }
6864
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006865 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006866 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6867 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006868 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006869
6870 /*
6871 * We may have the VersionInfo here.
6872 */
6873 version = xmlParseVersionInfo(ctxt);
6874 if (version == NULL)
6875 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006876 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006877 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006878 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6879 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006880 }
6881 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006882 ctxt->input->version = version;
6883
6884 /*
6885 * We must have the encoding declaration
6886 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006887 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006888 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6889 /*
6890 * The XML REC instructs us to stop parsing right here
6891 */
6892 return;
6893 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006894 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6895 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6896 "Missing encoding in text declaration\n");
6897 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006898
6899 SKIP_BLANKS;
6900 if ((RAW == '?') && (NXT(1) == '>')) {
6901 SKIP(2);
6902 } else if (RAW == '>') {
6903 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006904 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006905 NEXT;
6906 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006907 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006908 MOVETO_ENDTAG(CUR_PTR);
6909 NEXT;
6910 }
6911}
6912
6913/**
Owen Taylor3473f882001-02-23 17:55:21 +00006914 * xmlParseExternalSubset:
6915 * @ctxt: an XML parser context
6916 * @ExternalID: the external identifier
6917 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006918 *
Owen Taylor3473f882001-02-23 17:55:21 +00006919 * parse Markup declarations from an external subset
6920 *
6921 * [30] extSubset ::= textDecl? extSubsetDecl
6922 *
6923 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6924 */
6925void
6926xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6927 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006928 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006929 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006930
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006931 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006932 (ctxt->input->end - ctxt->input->cur >= 4)) {
6933 xmlChar start[4];
6934 xmlCharEncoding enc;
6935
6936 start[0] = RAW;
6937 start[1] = NXT(1);
6938 start[2] = NXT(2);
6939 start[3] = NXT(3);
6940 enc = xmlDetectCharEncoding(start, 4);
6941 if (enc != XML_CHAR_ENCODING_NONE)
6942 xmlSwitchEncoding(ctxt, enc);
6943 }
6944
Daniel Veillarda07050d2003-10-19 14:46:32 +00006945 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006946 xmlParseTextDecl(ctxt);
6947 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6948 /*
6949 * The XML REC instructs us to stop parsing right here
6950 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08006951 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006952 return;
6953 }
6954 }
6955 if (ctxt->myDoc == NULL) {
6956 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006957 if (ctxt->myDoc == NULL) {
6958 xmlErrMemory(ctxt, "New Doc failed");
6959 return;
6960 }
6961 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006962 }
6963 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6964 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6965
6966 ctxt->instate = XML_PARSER_DTD;
6967 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006968 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006969 while (((RAW == '<') && (NXT(1) == '?')) ||
6970 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006971 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006972 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006973 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006974
6975 GROW;
6976 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6977 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006978 } else
6979 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006980 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006981
Daniel Veillardfdc91562002-07-01 21:52:03 +00006982 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006983 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006984 break;
6985 }
6986 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006987
Owen Taylor3473f882001-02-23 17:55:21 +00006988 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006989 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006990 }
6991
6992}
6993
6994/**
6995 * xmlParseReference:
6996 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006997 *
Owen Taylor3473f882001-02-23 17:55:21 +00006998 * parse and handle entity references in content, depending on the SAX
6999 * interface, this may end-up in a call to character() if this is a
7000 * CharRef, a predefined entity, if there is no reference() callback.
7001 * or if the parser was asked to switch to that mode.
7002 *
7003 * [67] Reference ::= EntityRef | CharRef
7004 */
7005void
7006xmlParseReference(xmlParserCtxtPtr ctxt) {
7007 xmlEntityPtr ent;
7008 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007009 int was_checked;
7010 xmlNodePtr list = NULL;
7011 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007012
Daniel Veillard0161e632008-08-28 15:36:32 +00007013
7014 if (RAW != '&')
7015 return;
7016
7017 /*
7018 * Simple case of a CharRef
7019 */
Owen Taylor3473f882001-02-23 17:55:21 +00007020 if (NXT(1) == '#') {
7021 int i = 0;
7022 xmlChar out[10];
7023 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007024 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007025
Daniel Veillarddc171602008-03-26 17:41:38 +00007026 if (value == 0)
7027 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007028 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7029 /*
7030 * So we are using non-UTF-8 buffers
7031 * Check that the char fit on 8bits, if not
7032 * generate a CharRef.
7033 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007034 if (value <= 0xFF) {
7035 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007036 out[1] = 0;
7037 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7038 (!ctxt->disableSAX))
7039 ctxt->sax->characters(ctxt->userData, out, 1);
7040 } else {
7041 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007042 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007043 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007044 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007045 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7046 (!ctxt->disableSAX))
7047 ctxt->sax->reference(ctxt->userData, out);
7048 }
7049 } else {
7050 /*
7051 * Just encode the value in UTF-8
7052 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007053 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007054 out[i] = 0;
7055 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7056 (!ctxt->disableSAX))
7057 ctxt->sax->characters(ctxt->userData, out, i);
7058 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007059 return;
7060 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007061
Daniel Veillard0161e632008-08-28 15:36:32 +00007062 /*
7063 * We are seeing an entity reference
7064 */
7065 ent = xmlParseEntityRef(ctxt);
7066 if (ent == NULL) return;
7067 if (!ctxt->wellFormed)
7068 return;
7069 was_checked = ent->checked;
7070
7071 /* special case of predefined entities */
7072 if ((ent->name == NULL) ||
7073 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7074 val = ent->content;
7075 if (val == NULL) return;
7076 /*
7077 * inline the entity.
7078 */
7079 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7080 (!ctxt->disableSAX))
7081 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7082 return;
7083 }
7084
7085 /*
7086 * The first reference to the entity trigger a parsing phase
7087 * where the ent->children is filled with the result from
7088 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007089 * Note: external parsed entities will not be loaded, it is not
7090 * required for a non-validating parser, unless the parsing option
7091 * of validating, or substituting entities were given. Doing so is
7092 * far more secure as the parser will only process data coming from
7093 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007094 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007095 if (((ent->checked == 0) ||
7096 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007097 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7098 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007099 unsigned long oldnbent = ctxt->nbentities;
7100
7101 /*
7102 * This is a bit hackish but this seems the best
7103 * way to make sure both SAX and DOM entity support
7104 * behaves okay.
7105 */
7106 void *user_data;
7107 if (ctxt->userData == ctxt)
7108 user_data = NULL;
7109 else
7110 user_data = ctxt->userData;
7111
7112 /*
7113 * Check that this entity is well formed
7114 * 4.3.2: An internal general parsed entity is well-formed
7115 * if its replacement text matches the production labeled
7116 * content.
7117 */
7118 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7119 ctxt->depth++;
7120 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7121 user_data, &list);
7122 ctxt->depth--;
7123
7124 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7125 ctxt->depth++;
7126 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7127 user_data, ctxt->depth, ent->URI,
7128 ent->ExternalID, &list);
7129 ctxt->depth--;
7130 } else {
7131 ret = XML_ERR_ENTITY_PE_INTERNAL;
7132 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7133 "invalid entity type found\n", NULL);
7134 }
7135
7136 /*
7137 * Store the number of entities needing parsing for this entity
7138 * content and do checkings
7139 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007140 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7141 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7142 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007143 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007144 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007145 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007146 return;
7147 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007148 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007149 xmlFreeNodeList(list);
7150 return;
7151 }
Owen Taylor3473f882001-02-23 17:55:21 +00007152
Daniel Veillard0161e632008-08-28 15:36:32 +00007153 if ((ret == XML_ERR_OK) && (list != NULL)) {
7154 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7155 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7156 (ent->children == NULL)) {
7157 ent->children = list;
7158 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007159 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007160 * Prune it directly in the generated document
7161 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007162 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007163 if (((list->type == XML_TEXT_NODE) &&
7164 (list->next == NULL)) ||
7165 (ctxt->parseMode == XML_PARSE_READER)) {
7166 list->parent = (xmlNodePtr) ent;
7167 list = NULL;
7168 ent->owner = 1;
7169 } else {
7170 ent->owner = 0;
7171 while (list != NULL) {
7172 list->parent = (xmlNodePtr) ctxt->node;
7173 list->doc = ctxt->myDoc;
7174 if (list->next == NULL)
7175 ent->last = list;
7176 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007177 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007178 list = ent->children;
7179#ifdef LIBXML_LEGACY_ENABLED
7180 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7181 xmlAddEntityReference(ent, list, NULL);
7182#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007183 }
7184 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007185 ent->owner = 1;
7186 while (list != NULL) {
7187 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007188 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007189 if (list->next == NULL)
7190 ent->last = list;
7191 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007192 }
7193 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007194 } else {
7195 xmlFreeNodeList(list);
7196 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007197 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007198 } else if ((ret != XML_ERR_OK) &&
7199 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7200 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7201 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007202 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007203 } else if (list != NULL) {
7204 xmlFreeNodeList(list);
7205 list = NULL;
7206 }
7207 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007208 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007209
7210 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7211 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007212 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007213 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007214 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007215
Daniel Veillard0161e632008-08-28 15:36:32 +00007216 /*
7217 * Now that the entity content has been gathered
7218 * provide it to the application, this can take different forms based
7219 * on the parsing modes.
7220 */
7221 if (ent->children == NULL) {
7222 /*
7223 * Probably running in SAX mode and the callbacks don't
7224 * build the entity content. So unless we already went
7225 * though parsing for first checking go though the entity
7226 * content to generate callbacks associated to the entity
7227 */
7228 if (was_checked != 0) {
7229 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007230 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007231 * This is a bit hackish but this seems the best
7232 * way to make sure both SAX and DOM entity support
7233 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007234 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007235 if (ctxt->userData == ctxt)
7236 user_data = NULL;
7237 else
7238 user_data = ctxt->userData;
7239
7240 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7241 ctxt->depth++;
7242 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7243 ent->content, user_data, NULL);
7244 ctxt->depth--;
7245 } else if (ent->etype ==
7246 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7247 ctxt->depth++;
7248 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7249 ctxt->sax, user_data, ctxt->depth,
7250 ent->URI, ent->ExternalID, NULL);
7251 ctxt->depth--;
7252 } else {
7253 ret = XML_ERR_ENTITY_PE_INTERNAL;
7254 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7255 "invalid entity type found\n", NULL);
7256 }
7257 if (ret == XML_ERR_ENTITY_LOOP) {
7258 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7259 return;
7260 }
7261 }
7262 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7263 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7264 /*
7265 * Entity reference callback comes second, it's somewhat
7266 * superfluous but a compatibility to historical behaviour
7267 */
7268 ctxt->sax->reference(ctxt->userData, ent->name);
7269 }
7270 return;
7271 }
7272
7273 /*
7274 * If we didn't get any children for the entity being built
7275 */
7276 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7277 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7278 /*
7279 * Create a node.
7280 */
7281 ctxt->sax->reference(ctxt->userData, ent->name);
7282 return;
7283 }
7284
7285 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7286 /*
7287 * There is a problem on the handling of _private for entities
7288 * (bug 155816): Should we copy the content of the field from
7289 * the entity (possibly overwriting some value set by the user
7290 * when a copy is created), should we leave it alone, or should
7291 * we try to take care of different situations? The problem
7292 * is exacerbated by the usage of this field by the xmlReader.
7293 * To fix this bug, we look at _private on the created node
7294 * and, if it's NULL, we copy in whatever was in the entity.
7295 * If it's not NULL we leave it alone. This is somewhat of a
7296 * hack - maybe we should have further tests to determine
7297 * what to do.
7298 */
7299 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7300 /*
7301 * Seems we are generating the DOM content, do
7302 * a simple tree copy for all references except the first
7303 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007304 */
7305 if (((list == NULL) && (ent->owner == 0)) ||
7306 (ctxt->parseMode == XML_PARSE_READER)) {
7307 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7308
7309 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007310 * We are copying here, make sure there is no abuse
7311 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007312 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007313 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7314 return;
7315
7316 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007317 * when operating on a reader, the entities definitions
7318 * are always owning the entities subtree.
7319 if (ctxt->parseMode == XML_PARSE_READER)
7320 ent->owner = 1;
7321 */
7322
7323 cur = ent->children;
7324 while (cur != NULL) {
7325 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7326 if (nw != NULL) {
7327 if (nw->_private == NULL)
7328 nw->_private = cur->_private;
7329 if (firstChild == NULL){
7330 firstChild = nw;
7331 }
7332 nw = xmlAddChild(ctxt->node, nw);
7333 }
7334 if (cur == ent->last) {
7335 /*
7336 * needed to detect some strange empty
7337 * node cases in the reader tests
7338 */
7339 if ((ctxt->parseMode == XML_PARSE_READER) &&
7340 (nw != NULL) &&
7341 (nw->type == XML_ELEMENT_NODE) &&
7342 (nw->children == NULL))
7343 nw->extra = 1;
7344
7345 break;
7346 }
7347 cur = cur->next;
7348 }
7349#ifdef LIBXML_LEGACY_ENABLED
7350 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7351 xmlAddEntityReference(ent, firstChild, nw);
7352#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007353 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007354 xmlNodePtr nw = NULL, cur, next, last,
7355 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007356
7357 /*
7358 * We are copying here, make sure there is no abuse
7359 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007360 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007361 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7362 return;
7363
Daniel Veillard0161e632008-08-28 15:36:32 +00007364 /*
7365 * Copy the entity child list and make it the new
7366 * entity child list. The goal is to make sure any
7367 * ID or REF referenced will be the one from the
7368 * document content and not the entity copy.
7369 */
7370 cur = ent->children;
7371 ent->children = NULL;
7372 last = ent->last;
7373 ent->last = NULL;
7374 while (cur != NULL) {
7375 next = cur->next;
7376 cur->next = NULL;
7377 cur->parent = NULL;
7378 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7379 if (nw != NULL) {
7380 if (nw->_private == NULL)
7381 nw->_private = cur->_private;
7382 if (firstChild == NULL){
7383 firstChild = cur;
7384 }
7385 xmlAddChild((xmlNodePtr) ent, nw);
7386 xmlAddChild(ctxt->node, cur);
7387 }
7388 if (cur == last)
7389 break;
7390 cur = next;
7391 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007392 if (ent->owner == 0)
7393 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007394#ifdef LIBXML_LEGACY_ENABLED
7395 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7396 xmlAddEntityReference(ent, firstChild, nw);
7397#endif /* LIBXML_LEGACY_ENABLED */
7398 } else {
7399 const xmlChar *nbktext;
7400
7401 /*
7402 * the name change is to avoid coalescing of the
7403 * node with a possible previous text one which
7404 * would make ent->children a dangling pointer
7405 */
7406 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7407 -1);
7408 if (ent->children->type == XML_TEXT_NODE)
7409 ent->children->name = nbktext;
7410 if ((ent->last != ent->children) &&
7411 (ent->last->type == XML_TEXT_NODE))
7412 ent->last->name = nbktext;
7413 xmlAddChildList(ctxt->node, ent->children);
7414 }
7415
7416 /*
7417 * This is to avoid a nasty side effect, see
7418 * characters() in SAX.c
7419 */
7420 ctxt->nodemem = 0;
7421 ctxt->nodelen = 0;
7422 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007423 }
7424 }
7425}
7426
7427/**
7428 * xmlParseEntityRef:
7429 * @ctxt: an XML parser context
7430 *
7431 * parse ENTITY references declarations
7432 *
7433 * [68] EntityRef ::= '&' Name ';'
7434 *
7435 * [ WFC: Entity Declared ]
7436 * In a document without any DTD, a document with only an internal DTD
7437 * subset which contains no parameter entity references, or a document
7438 * with "standalone='yes'", the Name given in the entity reference
7439 * must match that in an entity declaration, except that well-formed
7440 * documents need not declare any of the following entities: amp, lt,
7441 * gt, apos, quot. The declaration of a parameter entity must precede
7442 * any reference to it. Similarly, the declaration of a general entity
7443 * must precede any reference to it which appears in a default value in an
7444 * attribute-list declaration. Note that if entities are declared in the
7445 * external subset or in external parameter entities, a non-validating
7446 * processor is not obligated to read and process their declarations;
7447 * for such documents, the rule that an entity must be declared is a
7448 * well-formedness constraint only if standalone='yes'.
7449 *
7450 * [ WFC: Parsed Entity ]
7451 * An entity reference must not contain the name of an unparsed entity
7452 *
7453 * Returns the xmlEntityPtr if found, or NULL otherwise.
7454 */
7455xmlEntityPtr
7456xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007457 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007458 xmlEntityPtr ent = NULL;
7459
7460 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007461 if (ctxt->instate == XML_PARSER_EOF)
7462 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007463
Daniel Veillard0161e632008-08-28 15:36:32 +00007464 if (RAW != '&')
7465 return(NULL);
7466 NEXT;
7467 name = xmlParseName(ctxt);
7468 if (name == NULL) {
7469 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7470 "xmlParseEntityRef: no name\n");
7471 return(NULL);
7472 }
7473 if (RAW != ';') {
7474 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7475 return(NULL);
7476 }
7477 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007478
Daniel Veillard0161e632008-08-28 15:36:32 +00007479 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007480 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007481 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007482 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7483 ent = xmlGetPredefinedEntity(name);
7484 if (ent != NULL)
7485 return(ent);
7486 }
Owen Taylor3473f882001-02-23 17:55:21 +00007487
Daniel Veillard0161e632008-08-28 15:36:32 +00007488 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007489 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007490 */
7491 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007492
Daniel Veillard0161e632008-08-28 15:36:32 +00007493 /*
7494 * Ask first SAX for entity resolution, otherwise try the
7495 * entities which may have stored in the parser context.
7496 */
7497 if (ctxt->sax != NULL) {
7498 if (ctxt->sax->getEntity != NULL)
7499 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007500 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007501 (ctxt->options & XML_PARSE_OLDSAX))
7502 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007503 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7504 (ctxt->userData==ctxt)) {
7505 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007506 }
7507 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007508 if (ctxt->instate == XML_PARSER_EOF)
7509 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007510 /*
7511 * [ WFC: Entity Declared ]
7512 * In a document without any DTD, a document with only an
7513 * internal DTD subset which contains no parameter entity
7514 * references, or a document with "standalone='yes'", the
7515 * Name given in the entity reference must match that in an
7516 * entity declaration, except that well-formed documents
7517 * need not declare any of the following entities: amp, lt,
7518 * gt, apos, quot.
7519 * The declaration of a parameter entity must precede any
7520 * reference to it.
7521 * Similarly, the declaration of a general entity must
7522 * precede any reference to it which appears in a default
7523 * value in an attribute-list declaration. Note that if
7524 * entities are declared in the external subset or in
7525 * external parameter entities, a non-validating processor
7526 * is not obligated to read and process their declarations;
7527 * for such documents, the rule that an entity must be
7528 * declared is a well-formedness constraint only if
7529 * standalone='yes'.
7530 */
7531 if (ent == NULL) {
7532 if ((ctxt->standalone == 1) ||
7533 ((ctxt->hasExternalSubset == 0) &&
7534 (ctxt->hasPErefs == 0))) {
7535 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7536 "Entity '%s' not defined\n", name);
7537 } else {
7538 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7539 "Entity '%s' not defined\n", name);
7540 if ((ctxt->inSubset == 0) &&
7541 (ctxt->sax != NULL) &&
7542 (ctxt->sax->reference != NULL)) {
7543 ctxt->sax->reference(ctxt->userData, name);
7544 }
7545 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007546 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007547 ctxt->valid = 0;
7548 }
7549
7550 /*
7551 * [ WFC: Parsed Entity ]
7552 * An entity reference must not contain the name of an
7553 * unparsed entity
7554 */
7555 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7556 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7557 "Entity reference to unparsed entity %s\n", name);
7558 }
7559
7560 /*
7561 * [ WFC: No External Entity References ]
7562 * Attribute values cannot contain direct or indirect
7563 * entity references to external entities.
7564 */
7565 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7566 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7567 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7568 "Attribute references external entity '%s'\n", name);
7569 }
7570 /*
7571 * [ WFC: No < in Attribute Values ]
7572 * The replacement text of any entity referred to directly or
7573 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007574 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007575 */
7576 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007577 (ent != NULL) &&
7578 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007579 if (((ent->checked & 1) || (ent->checked == 0)) &&
7580 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007581 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7582 "'<' in entity '%s' is not allowed in attributes values\n", name);
7583 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007584 }
7585
7586 /*
7587 * Internal check, no parameter entities here ...
7588 */
7589 else {
7590 switch (ent->etype) {
7591 case XML_INTERNAL_PARAMETER_ENTITY:
7592 case XML_EXTERNAL_PARAMETER_ENTITY:
7593 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7594 "Attempt to reference the parameter entity '%s'\n",
7595 name);
7596 break;
7597 default:
7598 break;
7599 }
7600 }
7601
7602 /*
7603 * [ WFC: No Recursion ]
7604 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007605 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007606 * Done somewhere else
7607 */
Owen Taylor3473f882001-02-23 17:55:21 +00007608 return(ent);
7609}
7610
7611/**
7612 * xmlParseStringEntityRef:
7613 * @ctxt: an XML parser context
7614 * @str: a pointer to an index in the string
7615 *
7616 * parse ENTITY references declarations, but this version parses it from
7617 * a string value.
7618 *
7619 * [68] EntityRef ::= '&' Name ';'
7620 *
7621 * [ WFC: Entity Declared ]
7622 * In a document without any DTD, a document with only an internal DTD
7623 * subset which contains no parameter entity references, or a document
7624 * with "standalone='yes'", the Name given in the entity reference
7625 * must match that in an entity declaration, except that well-formed
7626 * documents need not declare any of the following entities: amp, lt,
7627 * gt, apos, quot. The declaration of a parameter entity must precede
7628 * any reference to it. Similarly, the declaration of a general entity
7629 * must precede any reference to it which appears in a default value in an
7630 * attribute-list declaration. Note that if entities are declared in the
7631 * external subset or in external parameter entities, a non-validating
7632 * processor is not obligated to read and process their declarations;
7633 * for such documents, the rule that an entity must be declared is a
7634 * well-formedness constraint only if standalone='yes'.
7635 *
7636 * [ WFC: Parsed Entity ]
7637 * An entity reference must not contain the name of an unparsed entity
7638 *
7639 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7640 * is updated to the current location in the string.
7641 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007642static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007643xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7644 xmlChar *name;
7645 const xmlChar *ptr;
7646 xmlChar cur;
7647 xmlEntityPtr ent = NULL;
7648
7649 if ((str == NULL) || (*str == NULL))
7650 return(NULL);
7651 ptr = *str;
7652 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007653 if (cur != '&')
7654 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007655
Daniel Veillard0161e632008-08-28 15:36:32 +00007656 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007657 name = xmlParseStringName(ctxt, &ptr);
7658 if (name == NULL) {
7659 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7660 "xmlParseStringEntityRef: no name\n");
7661 *str = ptr;
7662 return(NULL);
7663 }
7664 if (*ptr != ';') {
7665 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007666 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007667 *str = ptr;
7668 return(NULL);
7669 }
7670 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007671
Owen Taylor3473f882001-02-23 17:55:21 +00007672
Daniel Veillard0161e632008-08-28 15:36:32 +00007673 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007674 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007675 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007676 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7677 ent = xmlGetPredefinedEntity(name);
7678 if (ent != NULL) {
7679 xmlFree(name);
7680 *str = ptr;
7681 return(ent);
7682 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007683 }
Owen Taylor3473f882001-02-23 17:55:21 +00007684
Daniel Veillard0161e632008-08-28 15:36:32 +00007685 /*
7686 * Increate the number of entity references parsed
7687 */
7688 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007689
Daniel Veillard0161e632008-08-28 15:36:32 +00007690 /*
7691 * Ask first SAX for entity resolution, otherwise try the
7692 * entities which may have stored in the parser context.
7693 */
7694 if (ctxt->sax != NULL) {
7695 if (ctxt->sax->getEntity != NULL)
7696 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007697 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7698 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007699 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7700 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007701 }
7702 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007703 if (ctxt->instate == XML_PARSER_EOF) {
7704 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007705 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007706 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007707
7708 /*
7709 * [ WFC: Entity Declared ]
7710 * In a document without any DTD, a document with only an
7711 * internal DTD subset which contains no parameter entity
7712 * references, or a document with "standalone='yes'", the
7713 * Name given in the entity reference must match that in an
7714 * entity declaration, except that well-formed documents
7715 * need not declare any of the following entities: amp, lt,
7716 * gt, apos, quot.
7717 * The declaration of a parameter entity must precede any
7718 * reference to it.
7719 * Similarly, the declaration of a general entity must
7720 * precede any reference to it which appears in a default
7721 * value in an attribute-list declaration. Note that if
7722 * entities are declared in the external subset or in
7723 * external parameter entities, a non-validating processor
7724 * is not obligated to read and process their declarations;
7725 * for such documents, the rule that an entity must be
7726 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007727 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007728 */
7729 if (ent == NULL) {
7730 if ((ctxt->standalone == 1) ||
7731 ((ctxt->hasExternalSubset == 0) &&
7732 (ctxt->hasPErefs == 0))) {
7733 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7734 "Entity '%s' not defined\n", name);
7735 } else {
7736 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7737 "Entity '%s' not defined\n",
7738 name);
7739 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007740 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007741 /* TODO ? check regressions ctxt->valid = 0; */
7742 }
7743
7744 /*
7745 * [ WFC: Parsed Entity ]
7746 * An entity reference must not contain the name of an
7747 * unparsed entity
7748 */
7749 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7750 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7751 "Entity reference to unparsed entity %s\n", name);
7752 }
7753
7754 /*
7755 * [ WFC: No External Entity References ]
7756 * Attribute values cannot contain direct or indirect
7757 * entity references to external entities.
7758 */
7759 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7760 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7761 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7762 "Attribute references external entity '%s'\n", name);
7763 }
7764 /*
7765 * [ WFC: No < in Attribute Values ]
7766 * The replacement text of any entity referred to directly or
7767 * indirectly in an attribute value (other than "&lt;") must
7768 * not contain a <.
7769 */
7770 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7771 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007772 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007773 (xmlStrchr(ent->content, '<'))) {
7774 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7775 "'<' in entity '%s' is not allowed in attributes values\n",
7776 name);
7777 }
7778
7779 /*
7780 * Internal check, no parameter entities here ...
7781 */
7782 else {
7783 switch (ent->etype) {
7784 case XML_INTERNAL_PARAMETER_ENTITY:
7785 case XML_EXTERNAL_PARAMETER_ENTITY:
7786 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7787 "Attempt to reference the parameter entity '%s'\n",
7788 name);
7789 break;
7790 default:
7791 break;
7792 }
7793 }
7794
7795 /*
7796 * [ WFC: No Recursion ]
7797 * A parsed entity must not contain a recursive reference
7798 * to itself, either directly or indirectly.
7799 * Done somewhere else
7800 */
7801
7802 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007803 *str = ptr;
7804 return(ent);
7805}
7806
7807/**
7808 * xmlParsePEReference:
7809 * @ctxt: an XML parser context
7810 *
7811 * parse PEReference declarations
7812 * The entity content is handled directly by pushing it's content as
7813 * a new input stream.
7814 *
7815 * [69] PEReference ::= '%' Name ';'
7816 *
7817 * [ WFC: No Recursion ]
7818 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007819 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007820 *
7821 * [ WFC: Entity Declared ]
7822 * In a document without any DTD, a document with only an internal DTD
7823 * subset which contains no parameter entity references, or a document
7824 * with "standalone='yes'", ... ... The declaration of a parameter
7825 * entity must precede any reference to it...
7826 *
7827 * [ VC: Entity Declared ]
7828 * In a document with an external subset or external parameter entities
7829 * with "standalone='no'", ... ... The declaration of a parameter entity
7830 * must precede any reference to it...
7831 *
7832 * [ WFC: In DTD ]
7833 * Parameter-entity references may only appear in the DTD.
7834 * NOTE: misleading but this is handled.
7835 */
7836void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007837xmlParsePEReference(xmlParserCtxtPtr ctxt)
7838{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007839 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007840 xmlEntityPtr entity = NULL;
7841 xmlParserInputPtr input;
7842
Daniel Veillard0161e632008-08-28 15:36:32 +00007843 if (RAW != '%')
7844 return;
7845 NEXT;
7846 name = xmlParseName(ctxt);
7847 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007848 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007849 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007850 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007851 if (xmlParserDebugEntities)
7852 xmlGenericError(xmlGenericErrorContext,
7853 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007854 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007855 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007856 return;
7857 }
7858
7859 NEXT;
7860
7861 /*
7862 * Increate the number of entity references parsed
7863 */
7864 ctxt->nbentities++;
7865
7866 /*
7867 * Request the entity from SAX
7868 */
7869 if ((ctxt->sax != NULL) &&
7870 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007871 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7872 if (ctxt->instate == XML_PARSER_EOF)
7873 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007874 if (entity == NULL) {
7875 /*
7876 * [ WFC: Entity Declared ]
7877 * In a document without any DTD, a document with only an
7878 * internal DTD subset which contains no parameter entity
7879 * references, or a document with "standalone='yes'", ...
7880 * ... The declaration of a parameter entity must precede
7881 * any reference to it...
7882 */
7883 if ((ctxt->standalone == 1) ||
7884 ((ctxt->hasExternalSubset == 0) &&
7885 (ctxt->hasPErefs == 0))) {
7886 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7887 "PEReference: %%%s; not found\n",
7888 name);
7889 } else {
7890 /*
7891 * [ VC: Entity Declared ]
7892 * In a document with an external subset or external
7893 * parameter entities with "standalone='no'", ...
7894 * ... The declaration of a parameter entity must
7895 * precede any reference to it...
7896 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007897 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7898 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7899 "PEReference: %%%s; not found\n",
7900 name, NULL);
7901 } else
7902 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7903 "PEReference: %%%s; not found\n",
7904 name, NULL);
7905 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007906 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007907 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007908 } else {
7909 /*
7910 * Internal checking in case the entity quest barfed
7911 */
7912 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7913 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7914 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7915 "Internal: %%%s; is not a parameter entity\n",
7916 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007917 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007918 xmlChar start[4];
7919 xmlCharEncoding enc;
7920
Neel Mehta90ccb582017-04-07 17:43:02 +02007921 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7922 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7923 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7924 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7925 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7926 (ctxt->replaceEntities == 0) &&
7927 (ctxt->validate == 0))
7928 return;
7929
Daniel Veillard0161e632008-08-28 15:36:32 +00007930 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007931 if (xmlPushInput(ctxt, input) < 0) {
7932 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00007933 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007934 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02007935
7936 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7937 /*
7938 * Get the 4 first bytes and decode the charset
7939 * if enc != XML_CHAR_ENCODING_NONE
7940 * plug some encoding conversion routines.
7941 * Note that, since we may have some non-UTF8
7942 * encoding (like UTF16, bug 135229), the 'length'
7943 * is not known, but we can calculate based upon
7944 * the amount of data in the buffer.
7945 */
7946 GROW
7947 if (ctxt->instate == XML_PARSER_EOF)
7948 return;
7949 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7950 start[0] = RAW;
7951 start[1] = NXT(1);
7952 start[2] = NXT(2);
7953 start[3] = NXT(3);
7954 enc = xmlDetectCharEncoding(start, 4);
7955 if (enc != XML_CHAR_ENCODING_NONE) {
7956 xmlSwitchEncoding(ctxt, enc);
7957 }
7958 }
7959
7960 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7961 (IS_BLANK_CH(NXT(5)))) {
7962 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02007963 }
7964 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007965 }
7966 }
7967 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007968}
7969
7970/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007971 * xmlLoadEntityContent:
7972 * @ctxt: an XML parser context
7973 * @entity: an unloaded system entity
7974 *
7975 * Load the original content of the given system entity from the
7976 * ExternalID/SystemID given. This is to be used for Included in Literal
7977 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7978 *
7979 * Returns 0 in case of success and -1 in case of failure
7980 */
7981static int
7982xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7983 xmlParserInputPtr input;
7984 xmlBufferPtr buf;
7985 int l, c;
7986 int count = 0;
7987
7988 if ((ctxt == NULL) || (entity == NULL) ||
7989 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7990 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7991 (entity->content != NULL)) {
7992 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7993 "xmlLoadEntityContent parameter error");
7994 return(-1);
7995 }
7996
7997 if (xmlParserDebugEntities)
7998 xmlGenericError(xmlGenericErrorContext,
7999 "Reading %s entity content input\n", entity->name);
8000
8001 buf = xmlBufferCreate();
8002 if (buf == NULL) {
8003 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8004 "xmlLoadEntityContent parameter error");
8005 return(-1);
8006 }
8007
8008 input = xmlNewEntityInputStream(ctxt, entity);
8009 if (input == NULL) {
8010 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8011 "xmlLoadEntityContent input error");
8012 xmlBufferFree(buf);
8013 return(-1);
8014 }
8015
8016 /*
8017 * Push the entity as the current input, read char by char
8018 * saving to the buffer until the end of the entity or an error
8019 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008020 if (xmlPushInput(ctxt, input) < 0) {
8021 xmlBufferFree(buf);
8022 return(-1);
8023 }
8024
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008025 GROW;
8026 c = CUR_CHAR(l);
8027 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8028 (IS_CHAR(c))) {
8029 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008030 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008031 count = 0;
8032 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008033 if (ctxt->instate == XML_PARSER_EOF) {
8034 xmlBufferFree(buf);
8035 return(-1);
8036 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008037 }
8038 NEXTL(l);
8039 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008040 if (c == 0) {
8041 count = 0;
8042 GROW;
8043 if (ctxt->instate == XML_PARSER_EOF) {
8044 xmlBufferFree(buf);
8045 return(-1);
8046 }
8047 c = CUR_CHAR(l);
8048 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008049 }
8050
8051 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8052 xmlPopInput(ctxt);
8053 } else if (!IS_CHAR(c)) {
8054 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8055 "xmlLoadEntityContent: invalid char value %d\n",
8056 c);
8057 xmlBufferFree(buf);
8058 return(-1);
8059 }
8060 entity->content = buf->content;
8061 buf->content = NULL;
8062 xmlBufferFree(buf);
8063
8064 return(0);
8065}
8066
8067/**
Owen Taylor3473f882001-02-23 17:55:21 +00008068 * xmlParseStringPEReference:
8069 * @ctxt: an XML parser context
8070 * @str: a pointer to an index in the string
8071 *
8072 * parse PEReference declarations
8073 *
8074 * [69] PEReference ::= '%' Name ';'
8075 *
8076 * [ WFC: No Recursion ]
8077 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008078 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008079 *
8080 * [ WFC: Entity Declared ]
8081 * In a document without any DTD, a document with only an internal DTD
8082 * subset which contains no parameter entity references, or a document
8083 * with "standalone='yes'", ... ... The declaration of a parameter
8084 * entity must precede any reference to it...
8085 *
8086 * [ VC: Entity Declared ]
8087 * In a document with an external subset or external parameter entities
8088 * with "standalone='no'", ... ... The declaration of a parameter entity
8089 * must precede any reference to it...
8090 *
8091 * [ WFC: In DTD ]
8092 * Parameter-entity references may only appear in the DTD.
8093 * NOTE: misleading but this is handled.
8094 *
8095 * Returns the string of the entity content.
8096 * str is updated to the current value of the index
8097 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008098static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008099xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8100 const xmlChar *ptr;
8101 xmlChar cur;
8102 xmlChar *name;
8103 xmlEntityPtr entity = NULL;
8104
8105 if ((str == NULL) || (*str == NULL)) return(NULL);
8106 ptr = *str;
8107 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008108 if (cur != '%')
8109 return(NULL);
8110 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008111 name = xmlParseStringName(ctxt, &ptr);
8112 if (name == NULL) {
8113 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8114 "xmlParseStringPEReference: no name\n");
8115 *str = ptr;
8116 return(NULL);
8117 }
8118 cur = *ptr;
8119 if (cur != ';') {
8120 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8121 xmlFree(name);
8122 *str = ptr;
8123 return(NULL);
8124 }
8125 ptr++;
8126
8127 /*
8128 * Increate the number of entity references parsed
8129 */
8130 ctxt->nbentities++;
8131
8132 /*
8133 * Request the entity from SAX
8134 */
8135 if ((ctxt->sax != NULL) &&
8136 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008137 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8138 if (ctxt->instate == XML_PARSER_EOF) {
8139 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008140 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008141 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008142 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008143 if (entity == NULL) {
8144 /*
8145 * [ WFC: Entity Declared ]
8146 * In a document without any DTD, a document with only an
8147 * internal DTD subset which contains no parameter entity
8148 * references, or a document with "standalone='yes'", ...
8149 * ... The declaration of a parameter entity must precede
8150 * any reference to it...
8151 */
8152 if ((ctxt->standalone == 1) ||
8153 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8154 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8155 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008156 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008157 /*
8158 * [ VC: Entity Declared ]
8159 * In a document with an external subset or external
8160 * parameter entities with "standalone='no'", ...
8161 * ... The declaration of a parameter entity must
8162 * precede any reference to it...
8163 */
8164 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8165 "PEReference: %%%s; not found\n",
8166 name, NULL);
8167 ctxt->valid = 0;
8168 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008169 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008170 } else {
8171 /*
8172 * Internal checking in case the entity quest barfed
8173 */
8174 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8175 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8176 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8177 "%%%s; is not a parameter entity\n",
8178 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008179 }
8180 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008181 ctxt->hasPErefs = 1;
8182 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008183 *str = ptr;
8184 return(entity);
8185}
8186
8187/**
8188 * xmlParseDocTypeDecl:
8189 * @ctxt: an XML parser context
8190 *
8191 * parse a DOCTYPE declaration
8192 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008193 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008194 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8195 *
8196 * [ VC: Root Element Type ]
8197 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008198 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008199 */
8200
8201void
8202xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008203 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008204 xmlChar *ExternalID = NULL;
8205 xmlChar *URI = NULL;
8206
8207 /*
8208 * We know that '<!DOCTYPE' has been detected.
8209 */
8210 SKIP(9);
8211
8212 SKIP_BLANKS;
8213
8214 /*
8215 * Parse the DOCTYPE name.
8216 */
8217 name = xmlParseName(ctxt);
8218 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008219 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8220 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008221 }
8222 ctxt->intSubName = name;
8223
8224 SKIP_BLANKS;
8225
8226 /*
8227 * Check for SystemID and ExternalID
8228 */
8229 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8230
8231 if ((URI != NULL) || (ExternalID != NULL)) {
8232 ctxt->hasExternalSubset = 1;
8233 }
8234 ctxt->extSubURI = URI;
8235 ctxt->extSubSystem = ExternalID;
8236
8237 SKIP_BLANKS;
8238
8239 /*
8240 * Create and update the internal subset.
8241 */
8242 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8243 (!ctxt->disableSAX))
8244 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008245 if (ctxt->instate == XML_PARSER_EOF)
8246 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008247
8248 /*
8249 * Is there any internal subset declarations ?
8250 * they are handled separately in xmlParseInternalSubset()
8251 */
8252 if (RAW == '[')
8253 return;
8254
8255 /*
8256 * We should be at the end of the DOCTYPE declaration.
8257 */
8258 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008259 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008260 }
8261 NEXT;
8262}
8263
8264/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008265 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008266 * @ctxt: an XML parser context
8267 *
8268 * parse the internal subset declaration
8269 *
8270 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8271 */
8272
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008273static void
Owen Taylor3473f882001-02-23 17:55:21 +00008274xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8275 /*
8276 * Is there any DTD definition ?
8277 */
8278 if (RAW == '[') {
8279 ctxt->instate = XML_PARSER_DTD;
8280 NEXT;
8281 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008282 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008283 * PEReferences.
8284 * Subsequence (markupdecl | PEReference | S)*
8285 */
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008286 while (((RAW != ']') || (ctxt->inputNr > 1)) &&
8287 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008288 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008289 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008290
8291 SKIP_BLANKS;
8292 xmlParseMarkupDecl(ctxt);
8293 xmlParsePEReference(ctxt);
8294
Owen Taylor3473f882001-02-23 17:55:21 +00008295 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008296 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008297 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008298 if (ctxt->inputNr > 1)
8299 xmlPopInput(ctxt);
8300 else
8301 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008302 }
8303 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008304 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008305 NEXT;
8306 SKIP_BLANKS;
8307 }
8308 }
8309
8310 /*
8311 * We should be at the end of the DOCTYPE declaration.
8312 */
8313 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008314 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008315 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008316 }
8317 NEXT;
8318}
8319
Daniel Veillard81273902003-09-30 00:43:48 +00008320#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008321/**
8322 * xmlParseAttribute:
8323 * @ctxt: an XML parser context
8324 * @value: a xmlChar ** used to store the value of the attribute
8325 *
8326 * parse an attribute
8327 *
8328 * [41] Attribute ::= Name Eq AttValue
8329 *
8330 * [ WFC: No External Entity References ]
8331 * Attribute values cannot contain direct or indirect entity references
8332 * to external entities.
8333 *
8334 * [ WFC: No < in Attribute Values ]
8335 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008336 * an attribute value (other than "&lt;") must not contain a <.
8337 *
Owen Taylor3473f882001-02-23 17:55:21 +00008338 * [ VC: Attribute Value Type ]
8339 * The attribute must have been declared; the value must be of the type
8340 * declared for it.
8341 *
8342 * [25] Eq ::= S? '=' S?
8343 *
8344 * With namespace:
8345 *
8346 * [NS 11] Attribute ::= QName Eq AttValue
8347 *
8348 * Also the case QName == xmlns:??? is handled independently as a namespace
8349 * definition.
8350 *
8351 * Returns the attribute name, and the value in *value.
8352 */
8353
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008354const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008355xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008356 const xmlChar *name;
8357 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008358
8359 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008360 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008361 name = xmlParseName(ctxt);
8362 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008363 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008364 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008365 return(NULL);
8366 }
8367
8368 /*
8369 * read the value
8370 */
8371 SKIP_BLANKS;
8372 if (RAW == '=') {
8373 NEXT;
8374 SKIP_BLANKS;
8375 val = xmlParseAttValue(ctxt);
8376 ctxt->instate = XML_PARSER_CONTENT;
8377 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008378 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008379 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008380 return(NULL);
8381 }
8382
8383 /*
8384 * Check that xml:lang conforms to the specification
8385 * No more registered as an error, just generate a warning now
8386 * since this was deprecated in XML second edition
8387 */
8388 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8389 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008390 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8391 "Malformed value for xml:lang : %s\n",
8392 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008393 }
8394 }
8395
8396 /*
8397 * Check that xml:space conforms to the specification
8398 */
8399 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8400 if (xmlStrEqual(val, BAD_CAST "default"))
8401 *(ctxt->space) = 0;
8402 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8403 *(ctxt->space) = 1;
8404 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008405 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008406"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008407 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008408 }
8409 }
8410
8411 *value = val;
8412 return(name);
8413}
8414
8415/**
8416 * xmlParseStartTag:
8417 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008418 *
Owen Taylor3473f882001-02-23 17:55:21 +00008419 * parse a start of tag either for rule element or
8420 * EmptyElement. In both case we don't parse the tag closing chars.
8421 *
8422 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8423 *
8424 * [ WFC: Unique Att Spec ]
8425 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008426 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008427 *
8428 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8429 *
8430 * [ WFC: Unique Att Spec ]
8431 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008432 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008433 *
8434 * With namespace:
8435 *
8436 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8437 *
8438 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8439 *
8440 * Returns the element name parsed
8441 */
8442
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008443const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008444xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008445 const xmlChar *name;
8446 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008447 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008448 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008449 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008450 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008451 int i;
8452
8453 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008454 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008455
8456 name = xmlParseName(ctxt);
8457 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008458 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008459 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008460 return(NULL);
8461 }
8462
8463 /*
8464 * Now parse the attributes, it ends up with the ending
8465 *
8466 * (S Attribute)* S?
8467 */
8468 SKIP_BLANKS;
8469 GROW;
8470
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008471 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008472 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008473 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008474 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008475 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008476
8477 attname = xmlParseAttribute(ctxt, &attvalue);
8478 if ((attname != NULL) && (attvalue != NULL)) {
8479 /*
8480 * [ WFC: Unique Att Spec ]
8481 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008482 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008483 */
8484 for (i = 0; i < nbatts;i += 2) {
8485 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008486 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008487 xmlFree(attvalue);
8488 goto failed;
8489 }
8490 }
Owen Taylor3473f882001-02-23 17:55:21 +00008491 /*
8492 * Add the pair to atts
8493 */
8494 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008495 maxatts = 22; /* allow for 10 attrs by default */
8496 atts = (const xmlChar **)
8497 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008498 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008499 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008500 if (attvalue != NULL)
8501 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008502 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008503 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008504 ctxt->atts = atts;
8505 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008506 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008507 const xmlChar **n;
8508
Owen Taylor3473f882001-02-23 17:55:21 +00008509 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008510 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008511 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008512 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008513 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008514 if (attvalue != NULL)
8515 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008516 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008517 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008518 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008519 ctxt->atts = atts;
8520 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008521 }
8522 atts[nbatts++] = attname;
8523 atts[nbatts++] = attvalue;
8524 atts[nbatts] = NULL;
8525 atts[nbatts + 1] = NULL;
8526 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008527 if (attvalue != NULL)
8528 xmlFree(attvalue);
8529 }
8530
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008531failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008532
Daniel Veillard3772de32002-12-17 10:31:45 +00008533 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008534 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8535 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008536 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008537 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8538 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008539 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008540 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8541 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008542 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8543 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008544 break;
8545 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008546 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008547 GROW;
8548 }
8549
8550 /*
8551 * SAX: Start of Element !
8552 */
8553 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008554 (!ctxt->disableSAX)) {
8555 if (nbatts > 0)
8556 ctxt->sax->startElement(ctxt->userData, name, atts);
8557 else
8558 ctxt->sax->startElement(ctxt->userData, name, NULL);
8559 }
Owen Taylor3473f882001-02-23 17:55:21 +00008560
8561 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008562 /* Free only the content strings */
8563 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008564 if (atts[i] != NULL)
8565 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008566 }
8567 return(name);
8568}
8569
8570/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008571 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008572 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008573 * @line: line of the start tag
8574 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008575 *
8576 * parse an end of tag
8577 *
8578 * [42] ETag ::= '</' Name S? '>'
8579 *
8580 * With namespace
8581 *
8582 * [NS 9] ETag ::= '</' QName S? '>'
8583 */
8584
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008585static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008586xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008587 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008588
8589 GROW;
8590 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008591 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008592 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008593 return;
8594 }
8595 SKIP(2);
8596
Daniel Veillard46de64e2002-05-29 08:21:33 +00008597 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008598
8599 /*
8600 * We should definitely be at the ending "S? '>'" part
8601 */
8602 GROW;
8603 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008604 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008605 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008606 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008607 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008608
8609 /*
8610 * [ WFC: Element Type Match ]
8611 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008612 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008613 *
8614 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008615 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008616 if (name == NULL) name = BAD_CAST "unparseable";
8617 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008618 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008619 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008620 }
8621
8622 /*
8623 * SAX: End of Tag
8624 */
8625 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8626 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008627 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008628
Daniel Veillarde57ec792003-09-10 10:50:59 +00008629 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008630 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008631 return;
8632}
8633
8634/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008635 * xmlParseEndTag:
8636 * @ctxt: an XML parser context
8637 *
8638 * parse an end of tag
8639 *
8640 * [42] ETag ::= '</' Name S? '>'
8641 *
8642 * With namespace
8643 *
8644 * [NS 9] ETag ::= '</' QName S? '>'
8645 */
8646
8647void
8648xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649 xmlParseEndTag1(ctxt, 0);
8650}
Daniel Veillard81273902003-09-30 00:43:48 +00008651#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652
8653/************************************************************************
8654 * *
8655 * SAX 2 specific operations *
8656 * *
8657 ************************************************************************/
8658
Daniel Veillard0fb18932003-09-07 09:14:37 +00008659/*
8660 * xmlGetNamespace:
8661 * @ctxt: an XML parser context
8662 * @prefix: the prefix to lookup
8663 *
8664 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008665 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008666 *
8667 * Returns the namespace name or NULL if not bound
8668 */
8669static const xmlChar *
8670xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8671 int i;
8672
Daniel Veillarde57ec792003-09-10 10:50:59 +00008673 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008675 if (ctxt->nsTab[i] == prefix) {
8676 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8677 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008678 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008679 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008680 return(NULL);
8681}
8682
8683/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008684 * xmlParseQName:
8685 * @ctxt: an XML parser context
8686 * @prefix: pointer to store the prefix part
8687 *
8688 * parse an XML Namespace QName
8689 *
8690 * [6] QName ::= (Prefix ':')? LocalPart
8691 * [7] Prefix ::= NCName
8692 * [8] LocalPart ::= NCName
8693 *
8694 * Returns the Name parsed or NULL
8695 */
8696
8697static const xmlChar *
8698xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8699 const xmlChar *l, *p;
8700
8701 GROW;
8702
8703 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008704 if (l == NULL) {
8705 if (CUR == ':') {
8706 l = xmlParseName(ctxt);
8707 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008708 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008709 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008710 *prefix = NULL;
8711 return(l);
8712 }
8713 }
8714 return(NULL);
8715 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008716 if (CUR == ':') {
8717 NEXT;
8718 p = l;
8719 l = xmlParseNCName(ctxt);
8720 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008721 xmlChar *tmp;
8722
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008723 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8724 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008725 l = xmlParseNmtoken(ctxt);
8726 if (l == NULL)
8727 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8728 else {
8729 tmp = xmlBuildQName(l, p, NULL, 0);
8730 xmlFree((char *)l);
8731 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008732 p = xmlDictLookup(ctxt->dict, tmp, -1);
8733 if (tmp != NULL) xmlFree(tmp);
8734 *prefix = NULL;
8735 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736 }
8737 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008738 xmlChar *tmp;
8739
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008740 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8741 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008742 NEXT;
8743 tmp = (xmlChar *) xmlParseName(ctxt);
8744 if (tmp != NULL) {
8745 tmp = xmlBuildQName(tmp, l, NULL, 0);
8746 l = xmlDictLookup(ctxt->dict, tmp, -1);
8747 if (tmp != NULL) xmlFree(tmp);
8748 *prefix = p;
8749 return(l);
8750 }
8751 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8752 l = xmlDictLookup(ctxt->dict, tmp, -1);
8753 if (tmp != NULL) xmlFree(tmp);
8754 *prefix = p;
8755 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756 }
8757 *prefix = p;
8758 } else
8759 *prefix = NULL;
8760 return(l);
8761}
8762
8763/**
8764 * xmlParseQNameAndCompare:
8765 * @ctxt: an XML parser context
8766 * @name: the localname
8767 * @prefix: the prefix, if any.
8768 *
8769 * parse an XML name and compares for match
8770 * (specialized for endtag parsing)
8771 *
8772 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8773 * and the name for mismatch
8774 */
8775
8776static const xmlChar *
8777xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8778 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008779 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008780 const xmlChar *in;
8781 const xmlChar *ret;
8782 const xmlChar *prefix2;
8783
8784 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8785
8786 GROW;
8787 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008788
Daniel Veillard0fb18932003-09-07 09:14:37 +00008789 cmp = prefix;
8790 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008791 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008792 ++cmp;
8793 }
8794 if ((*cmp == 0) && (*in == ':')) {
8795 in++;
8796 cmp = name;
8797 while (*in != 0 && *in == *cmp) {
8798 ++in;
8799 ++cmp;
8800 }
William M. Brack76e95df2003-10-18 16:20:14 +00008801 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008802 /* success */
8803 ctxt->input->cur = in;
8804 return((const xmlChar*) 1);
8805 }
8806 }
8807 /*
8808 * all strings coms from the dictionary, equality can be done directly
8809 */
8810 ret = xmlParseQName (ctxt, &prefix2);
8811 if ((ret == name) && (prefix == prefix2))
8812 return((const xmlChar*) 1);
8813 return ret;
8814}
8815
8816/**
8817 * xmlParseAttValueInternal:
8818 * @ctxt: an XML parser context
8819 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008820 * @alloc: whether the attribute was reallocated as a new string
8821 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008822 *
8823 * parse a value for an attribute.
8824 * NOTE: if no normalization is needed, the routine will return pointers
8825 * directly from the data buffer.
8826 *
8827 * 3.3.3 Attribute-Value Normalization:
8828 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008829 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008830 * - a character reference is processed by appending the referenced
8831 * character to the attribute value
8832 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008833 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8835 * appending #x20 to the normalized value, except that only a single
8836 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008837 * parsed entity or the literal entity value of an internal parsed entity
8838 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008839 * If the declared value is not CDATA, then the XML processor must further
8840 * process the normalized attribute value by discarding any leading and
8841 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008842 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008843 * All attributes for which no declaration has been read should be treated
8844 * by a non-validating parser as if declared CDATA.
8845 *
8846 * Returns the AttValue parsed or NULL. The value has to be freed by the
8847 * caller if it was copied, this can be detected by val[*len] == 0.
8848 */
8849
8850static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008851xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8852 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008853{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008854 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008855 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008856 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008857 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008858
8859 GROW;
8860 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008861 line = ctxt->input->line;
8862 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008863 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008864 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008865 return (NULL);
8866 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008867 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008868
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008869 /*
8870 * try to handle in this routine the most common case where no
8871 * allocation of a new string is required and where content is
8872 * pure ASCII.
8873 */
8874 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008875 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008876 end = ctxt->input->end;
8877 start = in;
8878 if (in >= end) {
8879 const xmlChar *oldbase = ctxt->input->base;
8880 GROW;
8881 if (oldbase != ctxt->input->base) {
8882 long delta = ctxt->input->base - oldbase;
8883 start = start + delta;
8884 in = in + delta;
8885 }
8886 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008888 if (normalize) {
8889 /*
8890 * Skip any leading spaces
8891 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008892 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008893 ((*in == 0x20) || (*in == 0x9) ||
8894 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008895 if (*in == 0xA) {
8896 line++; col = 1;
8897 } else {
8898 col++;
8899 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008900 in++;
8901 start = in;
8902 if (in >= end) {
8903 const xmlChar *oldbase = ctxt->input->base;
8904 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008905 if (ctxt->instate == XML_PARSER_EOF)
8906 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008907 if (oldbase != ctxt->input->base) {
8908 long delta = ctxt->input->base - oldbase;
8909 start = start + delta;
8910 in = in + delta;
8911 }
8912 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008913 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8914 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8915 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008916 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008917 return(NULL);
8918 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008919 }
8920 }
8921 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8922 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008923 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008924 if ((*in++ == 0x20) && (*in == 0x20)) break;
8925 if (in >= end) {
8926 const xmlChar *oldbase = ctxt->input->base;
8927 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008928 if (ctxt->instate == XML_PARSER_EOF)
8929 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008930 if (oldbase != ctxt->input->base) {
8931 long delta = ctxt->input->base - oldbase;
8932 start = start + delta;
8933 in = in + delta;
8934 }
8935 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008936 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8937 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8938 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008939 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008940 return(NULL);
8941 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008942 }
8943 }
8944 last = in;
8945 /*
8946 * skip the trailing blanks
8947 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008948 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008949 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008950 ((*in == 0x20) || (*in == 0x9) ||
8951 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008952 if (*in == 0xA) {
8953 line++, col = 1;
8954 } else {
8955 col++;
8956 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008957 in++;
8958 if (in >= end) {
8959 const xmlChar *oldbase = ctxt->input->base;
8960 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008961 if (ctxt->instate == XML_PARSER_EOF)
8962 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008963 if (oldbase != ctxt->input->base) {
8964 long delta = ctxt->input->base - oldbase;
8965 start = start + delta;
8966 in = in + delta;
8967 last = last + delta;
8968 }
8969 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008970 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8971 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8972 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008973 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008974 return(NULL);
8975 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008976 }
8977 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008978 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8979 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8980 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008981 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008982 return(NULL);
8983 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008984 if (*in != limit) goto need_complex;
8985 } else {
8986 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8987 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8988 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008989 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008990 if (in >= end) {
8991 const xmlChar *oldbase = ctxt->input->base;
8992 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008993 if (ctxt->instate == XML_PARSER_EOF)
8994 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008995 if (oldbase != ctxt->input->base) {
8996 long delta = ctxt->input->base - oldbase;
8997 start = start + delta;
8998 in = in + delta;
8999 }
9000 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009001 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9002 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9003 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009004 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009005 return(NULL);
9006 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009007 }
9008 }
9009 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009010 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9011 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9012 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009013 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009014 return(NULL);
9015 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009016 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009017 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009018 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009019 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009020 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009021 *len = last - start;
9022 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009023 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009024 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009025 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009026 }
9027 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009028 ctxt->input->line = line;
9029 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009030 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009031 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009032need_complex:
9033 if (alloc) *alloc = 1;
9034 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009035}
9036
9037/**
9038 * xmlParseAttribute2:
9039 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009040 * @pref: the element prefix
9041 * @elem: the element name
9042 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009043 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009044 * @len: an int * to save the length of the attribute
9045 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009046 *
9047 * parse an attribute in the new SAX2 framework.
9048 *
9049 * Returns the attribute name, and the value in *value, .
9050 */
9051
9052static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009053xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009054 const xmlChar * pref, const xmlChar * elem,
9055 const xmlChar ** prefix, xmlChar ** value,
9056 int *len, int *alloc)
9057{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009058 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009059 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009060 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009061
9062 *value = NULL;
9063 GROW;
9064 name = xmlParseQName(ctxt, prefix);
9065 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009066 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9067 "error parsing attribute name\n");
9068 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009069 }
9070
9071 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009072 * get the type if needed
9073 */
9074 if (ctxt->attsSpecial != NULL) {
9075 int type;
9076
9077 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009078 pref, elem, *prefix, name);
9079 if (type != 0)
9080 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009081 }
9082
9083 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009084 * read the value
9085 */
9086 SKIP_BLANKS;
9087 if (RAW == '=') {
9088 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009089 SKIP_BLANKS;
9090 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9091 if (normalize) {
9092 /*
9093 * Sometimes a second normalisation pass for spaces is needed
9094 * but that only happens if charrefs or entities refernces
9095 * have been used in the attribute value, i.e. the attribute
9096 * value have been extracted in an allocated string already.
9097 */
9098 if (*alloc) {
9099 const xmlChar *val2;
9100
9101 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009102 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009103 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009104 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009105 }
9106 }
9107 }
9108 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009109 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009110 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009111 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009112 name);
9113 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009114 }
9115
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009116 if (*prefix == ctxt->str_xml) {
9117 /*
9118 * Check that xml:lang conforms to the specification
9119 * No more registered as an error, just generate a warning now
9120 * since this was deprecated in XML second edition
9121 */
9122 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9123 internal_val = xmlStrndup(val, *len);
9124 if (!xmlCheckLanguageID(internal_val)) {
9125 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9126 "Malformed value for xml:lang : %s\n",
9127 internal_val, NULL);
9128 }
9129 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009130
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009131 /*
9132 * Check that xml:space conforms to the specification
9133 */
9134 if (xmlStrEqual(name, BAD_CAST "space")) {
9135 internal_val = xmlStrndup(val, *len);
9136 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9137 *(ctxt->space) = 0;
9138 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9139 *(ctxt->space) = 1;
9140 else {
9141 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9142 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9143 internal_val, NULL);
9144 }
9145 }
9146 if (internal_val) {
9147 xmlFree(internal_val);
9148 }
9149 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009150
9151 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009152 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009153}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009154/**
9155 * xmlParseStartTag2:
9156 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009157 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009158 * parse a start of tag either for rule element or
9159 * EmptyElement. In both case we don't parse the tag closing chars.
9160 * This routine is called when running SAX2 parsing
9161 *
9162 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9163 *
9164 * [ WFC: Unique Att Spec ]
9165 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009166 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009167 *
9168 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9169 *
9170 * [ WFC: Unique Att Spec ]
9171 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009172 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009173 *
9174 * With namespace:
9175 *
9176 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9177 *
9178 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9179 *
9180 * Returns the element name parsed
9181 */
9182
9183static const xmlChar *
9184xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009185 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009186 const xmlChar *localname;
9187 const xmlChar *prefix;
9188 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009189 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009190 const xmlChar *nsname;
9191 xmlChar *attvalue;
9192 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009193 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009194 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009195 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009196 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009197 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009198
9199 if (RAW != '<') return(NULL);
9200 NEXT1;
9201
9202 /*
9203 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9204 * point since the attribute values may be stored as pointers to
9205 * the buffer and calling SHRINK would destroy them !
9206 * The Shrinking is only possible once the full set of attribute
9207 * callbacks have been done.
9208 */
9209 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009210 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009211 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009212 nbatts = 0;
9213 nratts = 0;
9214 nbdef = 0;
9215 nbNs = 0;
9216 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009217 /* Forget any namespaces added during an earlier parse of this element. */
9218 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009219
9220 localname = xmlParseQName(ctxt, &prefix);
9221 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009222 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9223 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009224 return(NULL);
9225 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009226 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009227
9228 /*
9229 * Now parse the attributes, it ends up with the ending
9230 *
9231 * (S Attribute)* S?
9232 */
9233 SKIP_BLANKS;
9234 GROW;
9235
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009236 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009237 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009238 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009239 const xmlChar *q = CUR_PTR;
9240 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009241 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009242
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009243 attname = xmlParseAttribute2(ctxt, prefix, localname,
9244 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009245 if ((attname == NULL) || (attvalue == NULL))
9246 goto next_attr;
9247 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009248
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009249 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9250 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9251 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009252
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009253 if (URL == NULL) {
9254 xmlErrMemory(ctxt, "dictionary allocation failure");
9255 if ((attvalue != NULL) && (alloc != 0))
9256 xmlFree(attvalue);
9257 return(NULL);
9258 }
9259 if (*URL != 0) {
9260 uri = xmlParseURI((const char *) URL);
9261 if (uri == NULL) {
9262 xmlNsErr(ctxt, XML_WAR_NS_URI,
9263 "xmlns: '%s' is not a valid URI\n",
9264 URL, NULL, NULL);
9265 } else {
9266 if (uri->scheme == NULL) {
9267 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9268 "xmlns: URI %s is not absolute\n",
9269 URL, NULL, NULL);
9270 }
9271 xmlFreeURI(uri);
9272 }
Daniel Veillard37334572008-07-31 08:20:02 +00009273 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009274 if (attname != ctxt->str_xml) {
9275 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9276 "xml namespace URI cannot be the default namespace\n",
9277 NULL, NULL, NULL);
9278 }
9279 goto next_attr;
9280 }
9281 if ((len == 29) &&
9282 (xmlStrEqual(URL,
9283 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9284 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9285 "reuse of the xmlns namespace name is forbidden\n",
9286 NULL, NULL, NULL);
9287 goto next_attr;
9288 }
9289 }
9290 /*
9291 * check that it's not a defined namespace
9292 */
9293 for (j = 1;j <= nbNs;j++)
9294 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9295 break;
9296 if (j <= nbNs)
9297 xmlErrAttributeDup(ctxt, NULL, attname);
9298 else
9299 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009300
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009301 } else if (aprefix == ctxt->str_xmlns) {
9302 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9303 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009304
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009305 if (attname == ctxt->str_xml) {
9306 if (URL != ctxt->str_xml_ns) {
9307 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9308 "xml namespace prefix mapped to wrong URI\n",
9309 NULL, NULL, NULL);
9310 }
9311 /*
9312 * Do not keep a namespace definition node
9313 */
9314 goto next_attr;
9315 }
9316 if (URL == ctxt->str_xml_ns) {
9317 if (attname != ctxt->str_xml) {
9318 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319 "xml namespace URI mapped to wrong prefix\n",
9320 NULL, NULL, NULL);
9321 }
9322 goto next_attr;
9323 }
9324 if (attname == ctxt->str_xmlns) {
9325 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9326 "redefinition of the xmlns prefix is forbidden\n",
9327 NULL, NULL, NULL);
9328 goto next_attr;
9329 }
9330 if ((len == 29) &&
9331 (xmlStrEqual(URL,
9332 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9333 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9334 "reuse of the xmlns namespace name is forbidden\n",
9335 NULL, NULL, NULL);
9336 goto next_attr;
9337 }
9338 if ((URL == NULL) || (URL[0] == 0)) {
9339 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9340 "xmlns:%s: Empty XML namespace is not allowed\n",
9341 attname, NULL, NULL);
9342 goto next_attr;
9343 } else {
9344 uri = xmlParseURI((const char *) URL);
9345 if (uri == NULL) {
9346 xmlNsErr(ctxt, XML_WAR_NS_URI,
9347 "xmlns:%s: '%s' is not a valid URI\n",
9348 attname, URL, NULL);
9349 } else {
9350 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9351 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9352 "xmlns:%s: URI %s is not absolute\n",
9353 attname, URL, NULL);
9354 }
9355 xmlFreeURI(uri);
9356 }
9357 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009358
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009359 /*
9360 * check that it's not a defined namespace
9361 */
9362 for (j = 1;j <= nbNs;j++)
9363 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9364 break;
9365 if (j <= nbNs)
9366 xmlErrAttributeDup(ctxt, aprefix, attname);
9367 else
9368 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9369
9370 } else {
9371 /*
9372 * Add the pair to atts
9373 */
9374 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9375 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9376 goto next_attr;
9377 }
9378 maxatts = ctxt->maxatts;
9379 atts = ctxt->atts;
9380 }
9381 ctxt->attallocs[nratts++] = alloc;
9382 atts[nbatts++] = attname;
9383 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009384 /*
9385 * The namespace URI field is used temporarily to point at the
9386 * base of the current input buffer for non-alloced attributes.
9387 * When the input buffer is reallocated, all the pointers become
9388 * invalid, but they can be reconstructed later.
9389 */
9390 if (alloc)
9391 atts[nbatts++] = NULL;
9392 else
9393 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009394 atts[nbatts++] = attvalue;
9395 attvalue += len;
9396 atts[nbatts++] = attvalue;
9397 /*
9398 * tag if some deallocation is needed
9399 */
9400 if (alloc != 0) attval = 1;
9401 attvalue = NULL; /* moved into atts */
9402 }
9403
9404next_attr:
9405 if ((attvalue != NULL) && (alloc != 0)) {
9406 xmlFree(attvalue);
9407 attvalue = NULL;
9408 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009409
9410 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009411 if (ctxt->instate == XML_PARSER_EOF)
9412 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009413 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9414 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009415 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009416 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9417 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009418 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009419 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009420 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9421 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009422 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009423 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009424 break;
9425 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009426 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009427 }
9428
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009429 if (ctxt->input->id != inputid) {
9430 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9431 "Unexpected change of input\n");
9432 localname = NULL;
9433 goto done;
9434 }
9435
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009436 /* Reconstruct attribute value pointers. */
9437 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9438 if (atts[i+2] != NULL) {
9439 /*
9440 * Arithmetic on dangling pointers is technically undefined
9441 * behavior, but well...
9442 */
9443 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9444 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9445 atts[i+3] += offset; /* value */
9446 atts[i+4] += offset; /* valuend */
9447 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009448 }
9449
Daniel Veillard0fb18932003-09-07 09:14:37 +00009450 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009451 * The attributes defaulting
9452 */
9453 if (ctxt->attsDefault != NULL) {
9454 xmlDefAttrsPtr defaults;
9455
9456 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9457 if (defaults != NULL) {
9458 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009459 attname = defaults->values[5 * i];
9460 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009461
9462 /*
9463 * special work for namespaces defaulted defs
9464 */
9465 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9466 /*
9467 * check that it's not a defined namespace
9468 */
9469 for (j = 1;j <= nbNs;j++)
9470 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9471 break;
9472 if (j <= nbNs) continue;
9473
9474 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009475 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009476 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009477 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009478 nbNs++;
9479 }
9480 } else if (aprefix == ctxt->str_xmlns) {
9481 /*
9482 * check that it's not a defined namespace
9483 */
9484 for (j = 1;j <= nbNs;j++)
9485 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9486 break;
9487 if (j <= nbNs) continue;
9488
9489 nsname = xmlGetNamespace(ctxt, attname);
9490 if (nsname != defaults->values[2]) {
9491 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009492 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009493 nbNs++;
9494 }
9495 } else {
9496 /*
9497 * check that it's not a defined attribute
9498 */
9499 for (j = 0;j < nbatts;j+=5) {
9500 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9501 break;
9502 }
9503 if (j < nbatts) continue;
9504
9505 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9506 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009507 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009508 }
9509 maxatts = ctxt->maxatts;
9510 atts = ctxt->atts;
9511 }
9512 atts[nbatts++] = attname;
9513 atts[nbatts++] = aprefix;
9514 if (aprefix == NULL)
9515 atts[nbatts++] = NULL;
9516 else
9517 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009518 atts[nbatts++] = defaults->values[5 * i + 2];
9519 atts[nbatts++] = defaults->values[5 * i + 3];
9520 if ((ctxt->standalone == 1) &&
9521 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009522 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009523 "standalone: attribute %s on %s defaulted from external subset\n",
9524 attname, localname);
9525 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009526 nbdef++;
9527 }
9528 }
9529 }
9530 }
9531
Daniel Veillarde70c8772003-11-25 07:21:18 +00009532 /*
9533 * The attributes checkings
9534 */
9535 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009536 /*
9537 * The default namespace does not apply to attribute names.
9538 */
9539 if (atts[i + 1] != NULL) {
9540 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9541 if (nsname == NULL) {
9542 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9543 "Namespace prefix %s for %s on %s is not defined\n",
9544 atts[i + 1], atts[i], localname);
9545 }
9546 atts[i + 2] = nsname;
9547 } else
9548 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009549 /*
9550 * [ WFC: Unique Att Spec ]
9551 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009552 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009553 * As extended by the Namespace in XML REC.
9554 */
9555 for (j = 0; j < i;j += 5) {
9556 if (atts[i] == atts[j]) {
9557 if (atts[i+1] == atts[j+1]) {
9558 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9559 break;
9560 }
9561 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9562 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9563 "Namespaced Attribute %s in '%s' redefined\n",
9564 atts[i], nsname, NULL);
9565 break;
9566 }
9567 }
9568 }
9569 }
9570
Daniel Veillarde57ec792003-09-10 10:50:59 +00009571 nsname = xmlGetNamespace(ctxt, prefix);
9572 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009573 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9574 "Namespace prefix %s on %s is not defined\n",
9575 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009576 }
9577 *pref = prefix;
9578 *URI = nsname;
9579
9580 /*
9581 * SAX: Start of Element !
9582 */
9583 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9584 (!ctxt->disableSAX)) {
9585 if (nbNs > 0)
9586 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9587 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9588 nbatts / 5, nbdef, atts);
9589 else
9590 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9591 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9592 }
9593
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009594done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009595 /*
9596 * Free up attribute allocated strings if needed
9597 */
9598 if (attval != 0) {
9599 for (i = 3,j = 0; j < nratts;i += 5,j++)
9600 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9601 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009602 }
9603
9604 return(localname);
9605}
9606
9607/**
9608 * xmlParseEndTag2:
9609 * @ctxt: an XML parser context
9610 * @line: line of the start tag
9611 * @nsNr: number of namespaces on the start tag
9612 *
9613 * parse an end of tag
9614 *
9615 * [42] ETag ::= '</' Name S? '>'
9616 *
9617 * With namespace
9618 *
9619 * [NS 9] ETag ::= '</' QName S? '>'
9620 */
9621
9622static void
9623xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009624 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009625 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009626 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009627
9628 GROW;
9629 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009630 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009631 return;
9632 }
9633 SKIP(2);
9634
David Kilzerdb07dd62016-02-12 09:58:29 -08009635 curLength = ctxt->input->end - ctxt->input->cur;
9636 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9637 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9638 if ((curLength >= (size_t)(tlen + 1)) &&
9639 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009640 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009641 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009642 goto done;
9643 }
9644 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009645 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009646 name = (xmlChar*)1;
9647 } else {
9648 if (prefix == NULL)
9649 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9650 else
9651 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9652 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009653
9654 /*
9655 * We should definitely be at the ending "S? '>'" part
9656 */
9657 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009658 if (ctxt->instate == XML_PARSER_EOF)
9659 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009660 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009661 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009662 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009663 } else
9664 NEXT1;
9665
9666 /*
9667 * [ WFC: Element Type Match ]
9668 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009669 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009670 *
9671 */
9672 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009673 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009674 if ((line == 0) && (ctxt->node != NULL))
9675 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009676 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009677 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009678 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009679 }
9680
9681 /*
9682 * SAX: End of Tag
9683 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009684done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009685 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9686 (!ctxt->disableSAX))
9687 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9688
Daniel Veillard0fb18932003-09-07 09:14:37 +00009689 spacePop(ctxt);
9690 if (nsNr != 0)
9691 nsPop(ctxt, nsNr);
9692 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009693}
9694
9695/**
Owen Taylor3473f882001-02-23 17:55:21 +00009696 * xmlParseCDSect:
9697 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009698 *
Owen Taylor3473f882001-02-23 17:55:21 +00009699 * Parse escaped pure raw content.
9700 *
9701 * [18] CDSect ::= CDStart CData CDEnd
9702 *
9703 * [19] CDStart ::= '<![CDATA['
9704 *
9705 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9706 *
9707 * [21] CDEnd ::= ']]>'
9708 */
9709void
9710xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9711 xmlChar *buf = NULL;
9712 int len = 0;
9713 int size = XML_PARSER_BUFFER_SIZE;
9714 int r, rl;
9715 int s, sl;
9716 int cur, l;
9717 int count = 0;
9718
Daniel Veillard8f597c32003-10-06 08:19:27 +00009719 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009720 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009721 SKIP(9);
9722 } else
9723 return;
9724
9725 ctxt->instate = XML_PARSER_CDATA_SECTION;
9726 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009727 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009728 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009729 ctxt->instate = XML_PARSER_CONTENT;
9730 return;
9731 }
9732 NEXTL(rl);
9733 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009734 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009735 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009736 ctxt->instate = XML_PARSER_CONTENT;
9737 return;
9738 }
9739 NEXTL(sl);
9740 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009741 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009742 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009743 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009744 return;
9745 }
William M. Brack871611b2003-10-18 04:53:14 +00009746 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009747 ((r != ']') || (s != ']') || (cur != '>'))) {
9748 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009749 xmlChar *tmp;
9750
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009751 if ((size > XML_MAX_TEXT_LENGTH) &&
9752 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9753 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9754 "CData section too big found", NULL);
9755 xmlFree (buf);
9756 return;
9757 }
9758 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009759 if (tmp == NULL) {
9760 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009761 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009762 return;
9763 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009764 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009765 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009766 }
9767 COPY_BUF(rl,buf,len,r);
9768 r = s;
9769 rl = sl;
9770 s = cur;
9771 sl = l;
9772 count++;
9773 if (count > 50) {
9774 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009775 if (ctxt->instate == XML_PARSER_EOF) {
9776 xmlFree(buf);
9777 return;
9778 }
Owen Taylor3473f882001-02-23 17:55:21 +00009779 count = 0;
9780 }
9781 NEXTL(l);
9782 cur = CUR_CHAR(l);
9783 }
9784 buf[len] = 0;
9785 ctxt->instate = XML_PARSER_CONTENT;
9786 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009787 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009788 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009789 xmlFree(buf);
9790 return;
9791 }
9792 NEXTL(l);
9793
9794 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009795 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009796 */
9797 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9798 if (ctxt->sax->cdataBlock != NULL)
9799 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009800 else if (ctxt->sax->characters != NULL)
9801 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009802 }
9803 xmlFree(buf);
9804}
9805
9806/**
9807 * xmlParseContent:
9808 * @ctxt: an XML parser context
9809 *
9810 * Parse a content:
9811 *
9812 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9813 */
9814
9815void
9816xmlParseContent(xmlParserCtxtPtr ctxt) {
9817 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009818 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009819 ((RAW != '<') || (NXT(1) != '/')) &&
9820 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009821 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009822 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009823 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009824
9825 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009826 * First case : a Processing Instruction.
9827 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009828 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009829 xmlParsePI(ctxt);
9830 }
9831
9832 /*
9833 * Second case : a CDSection
9834 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009835 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009836 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009837 xmlParseCDSect(ctxt);
9838 }
9839
9840 /*
9841 * Third case : a comment
9842 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009843 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009844 (NXT(2) == '-') && (NXT(3) == '-')) {
9845 xmlParseComment(ctxt);
9846 ctxt->instate = XML_PARSER_CONTENT;
9847 }
9848
9849 /*
9850 * Fourth case : a sub-element.
9851 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009852 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009853 xmlParseElement(ctxt);
9854 }
9855
9856 /*
9857 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009858 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009859 */
9860
Daniel Veillard21a0f912001-02-25 19:54:14 +00009861 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009862 xmlParseReference(ctxt);
9863 }
9864
9865 /*
9866 * Last case, text. Note that References are handled directly.
9867 */
9868 else {
9869 xmlParseCharData(ctxt, 0);
9870 }
9871
9872 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009873 SHRINK;
9874
Daniel Veillardfdc91562002-07-01 21:52:03 +00009875 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009876 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9877 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009878 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009879 break;
9880 }
9881 }
9882}
9883
9884/**
9885 * xmlParseElement:
9886 * @ctxt: an XML parser context
9887 *
9888 * parse an XML element, this is highly recursive
9889 *
9890 * [39] element ::= EmptyElemTag | STag content ETag
9891 *
9892 * [ WFC: Element Type Match ]
9893 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009894 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009895 *
Owen Taylor3473f882001-02-23 17:55:21 +00009896 */
9897
9898void
9899xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009900 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009901 const xmlChar *prefix = NULL;
9902 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009903 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009904 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009905 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009906 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009907
Daniel Veillard8915c152008-08-26 13:05:34 +00009908 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9909 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9910 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9911 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9912 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08009913 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009914 return;
9915 }
9916
Owen Taylor3473f882001-02-23 17:55:21 +00009917 /* Capture start position */
9918 if (ctxt->record_info) {
9919 node_info.begin_pos = ctxt->input->consumed +
9920 (CUR_PTR - ctxt->input->base);
9921 node_info.begin_line = ctxt->input->line;
9922 }
9923
9924 if (ctxt->spaceNr == 0)
9925 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009926 else if (*ctxt->space == -2)
9927 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009928 else
9929 spacePush(ctxt, *ctxt->space);
9930
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009931 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009932#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009933 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009934#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009935 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009936#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009937 else
9938 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009939#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009940 if (ctxt->instate == XML_PARSER_EOF)
9941 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009942 if (name == NULL) {
9943 spacePop(ctxt);
9944 return;
9945 }
9946 namePush(ctxt, name);
9947 ret = ctxt->node;
9948
Daniel Veillard4432df22003-09-28 18:58:27 +00009949#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009950 /*
9951 * [ VC: Root Element Type ]
9952 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009953 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009954 */
9955 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9956 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9957 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009958#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009959
9960 /*
9961 * Check for an Empty Element.
9962 */
9963 if ((RAW == '/') && (NXT(1) == '>')) {
9964 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009965 if (ctxt->sax2) {
9966 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9967 (!ctxt->disableSAX))
9968 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009969#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009970 } else {
9971 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9972 (!ctxt->disableSAX))
9973 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009974#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009975 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009976 namePop(ctxt);
9977 spacePop(ctxt);
9978 if (nsNr != ctxt->nsNr)
9979 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009980 if ( ret != NULL && ctxt->record_info ) {
9981 node_info.end_pos = ctxt->input->consumed +
9982 (CUR_PTR - ctxt->input->base);
9983 node_info.end_line = ctxt->input->line;
9984 node_info.node = ret;
9985 xmlParserAddNodeInfo(ctxt, &node_info);
9986 }
9987 return;
9988 }
9989 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009990 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009991 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009992 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9993 "Couldn't find end of Start Tag %s line %d\n",
9994 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009995
9996 /*
9997 * end of parsing of this node.
9998 */
9999 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010000 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010001 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010002 if (nsNr != ctxt->nsNr)
10003 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010004
10005 /*
10006 * Capture end position and add node
10007 */
10008 if ( ret != NULL && ctxt->record_info ) {
10009 node_info.end_pos = ctxt->input->consumed +
10010 (CUR_PTR - ctxt->input->base);
10011 node_info.end_line = ctxt->input->line;
10012 node_info.node = ret;
10013 xmlParserAddNodeInfo(ctxt, &node_info);
10014 }
10015 return;
10016 }
10017
10018 /*
10019 * Parse the content of the element:
10020 */
10021 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010022 if (ctxt->instate == XML_PARSER_EOF)
10023 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010024 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010025 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010026 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010027 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010028
10029 /*
10030 * end of parsing of this node.
10031 */
10032 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010033 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010034 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010035 if (nsNr != ctxt->nsNr)
10036 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010037 return;
10038 }
10039
10040 /*
10041 * parse the end of tag: '</' should be here.
10042 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010043 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010044 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010045 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010046 }
10047#ifdef LIBXML_SAX1_ENABLED
10048 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010049 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010050#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010051
10052 /*
10053 * Capture end position and add node
10054 */
10055 if ( ret != NULL && ctxt->record_info ) {
10056 node_info.end_pos = ctxt->input->consumed +
10057 (CUR_PTR - ctxt->input->base);
10058 node_info.end_line = ctxt->input->line;
10059 node_info.node = ret;
10060 xmlParserAddNodeInfo(ctxt, &node_info);
10061 }
10062}
10063
10064/**
10065 * xmlParseVersionNum:
10066 * @ctxt: an XML parser context
10067 *
10068 * parse the XML version value.
10069 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010070 * [26] VersionNum ::= '1.' [0-9]+
10071 *
10072 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010073 *
10074 * Returns the string giving the XML version number, or NULL
10075 */
10076xmlChar *
10077xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10078 xmlChar *buf = NULL;
10079 int len = 0;
10080 int size = 10;
10081 xmlChar cur;
10082
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010083 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010084 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010085 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010086 return(NULL);
10087 }
10088 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010089 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010090 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010091 return(NULL);
10092 }
10093 buf[len++] = cur;
10094 NEXT;
10095 cur=CUR;
10096 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010097 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010098 return(NULL);
10099 }
10100 buf[len++] = cur;
10101 NEXT;
10102 cur=CUR;
10103 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010104 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010105 xmlChar *tmp;
10106
Owen Taylor3473f882001-02-23 17:55:21 +000010107 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010108 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10109 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010110 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010111 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010112 return(NULL);
10113 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010114 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010115 }
10116 buf[len++] = cur;
10117 NEXT;
10118 cur=CUR;
10119 }
10120 buf[len] = 0;
10121 return(buf);
10122}
10123
10124/**
10125 * xmlParseVersionInfo:
10126 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010127 *
Owen Taylor3473f882001-02-23 17:55:21 +000010128 * parse the XML version.
10129 *
10130 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010131 *
Owen Taylor3473f882001-02-23 17:55:21 +000010132 * [25] Eq ::= S? '=' S?
10133 *
10134 * Returns the version string, e.g. "1.0"
10135 */
10136
10137xmlChar *
10138xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10139 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010140
Daniel Veillarda07050d2003-10-19 14:46:32 +000010141 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010142 SKIP(7);
10143 SKIP_BLANKS;
10144 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010145 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010146 return(NULL);
10147 }
10148 NEXT;
10149 SKIP_BLANKS;
10150 if (RAW == '"') {
10151 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010152 version = xmlParseVersionNum(ctxt);
10153 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010154 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010155 } else
10156 NEXT;
10157 } else if (RAW == '\''){
10158 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010159 version = xmlParseVersionNum(ctxt);
10160 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010161 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010162 } else
10163 NEXT;
10164 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010165 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010166 }
10167 }
10168 return(version);
10169}
10170
10171/**
10172 * xmlParseEncName:
10173 * @ctxt: an XML parser context
10174 *
10175 * parse the XML encoding name
10176 *
10177 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10178 *
10179 * Returns the encoding name value or NULL
10180 */
10181xmlChar *
10182xmlParseEncName(xmlParserCtxtPtr ctxt) {
10183 xmlChar *buf = NULL;
10184 int len = 0;
10185 int size = 10;
10186 xmlChar cur;
10187
10188 cur = CUR;
10189 if (((cur >= 'a') && (cur <= 'z')) ||
10190 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010191 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010192 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010193 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010194 return(NULL);
10195 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010196
Owen Taylor3473f882001-02-23 17:55:21 +000010197 buf[len++] = cur;
10198 NEXT;
10199 cur = CUR;
10200 while (((cur >= 'a') && (cur <= 'z')) ||
10201 ((cur >= 'A') && (cur <= 'Z')) ||
10202 ((cur >= '0') && (cur <= '9')) ||
10203 (cur == '.') || (cur == '_') ||
10204 (cur == '-')) {
10205 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010206 xmlChar *tmp;
10207
Owen Taylor3473f882001-02-23 17:55:21 +000010208 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010209 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10210 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010211 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010212 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010213 return(NULL);
10214 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010215 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010216 }
10217 buf[len++] = cur;
10218 NEXT;
10219 cur = CUR;
10220 if (cur == 0) {
10221 SHRINK;
10222 GROW;
10223 cur = CUR;
10224 }
10225 }
10226 buf[len] = 0;
10227 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010228 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010229 }
10230 return(buf);
10231}
10232
10233/**
10234 * xmlParseEncodingDecl:
10235 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010236 *
Owen Taylor3473f882001-02-23 17:55:21 +000010237 * parse the XML encoding declaration
10238 *
10239 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10240 *
10241 * this setups the conversion filters.
10242 *
10243 * Returns the encoding value or NULL
10244 */
10245
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010246const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010247xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10248 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010249
10250 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010251 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010252 SKIP(8);
10253 SKIP_BLANKS;
10254 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010255 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010256 return(NULL);
10257 }
10258 NEXT;
10259 SKIP_BLANKS;
10260 if (RAW == '"') {
10261 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010262 encoding = xmlParseEncName(ctxt);
10263 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010264 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010265 xmlFree((xmlChar *) encoding);
10266 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010267 } else
10268 NEXT;
10269 } else if (RAW == '\''){
10270 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010271 encoding = xmlParseEncName(ctxt);
10272 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010273 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010274 xmlFree((xmlChar *) encoding);
10275 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010276 } else
10277 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010278 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010279 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010280 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010281
10282 /*
10283 * Non standard parsing, allowing the user to ignore encoding
10284 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010285 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10286 xmlFree((xmlChar *) encoding);
10287 return(NULL);
10288 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010289
Daniel Veillard6b621b82003-08-11 15:03:34 +000010290 /*
10291 * UTF-16 encoding stwich has already taken place at this stage,
10292 * more over the little-endian/big-endian selection is already done
10293 */
10294 if ((encoding != NULL) &&
10295 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10296 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010297 /*
10298 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010299 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010300 * document is apparently UTF-8 compatible, then raise an
10301 * encoding mismatch fatal error
10302 */
10303 if ((ctxt->encoding == NULL) &&
10304 (ctxt->input->buf != NULL) &&
10305 (ctxt->input->buf->encoder == NULL)) {
10306 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10307 "Document labelled UTF-16 but has UTF-8 content\n");
10308 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010309 if (ctxt->encoding != NULL)
10310 xmlFree((xmlChar *) ctxt->encoding);
10311 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010312 }
10313 /*
10314 * UTF-8 encoding is handled natively
10315 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010316 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010317 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10318 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010319 if (ctxt->encoding != NULL)
10320 xmlFree((xmlChar *) ctxt->encoding);
10321 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010322 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010323 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010324 xmlCharEncodingHandlerPtr handler;
10325
10326 if (ctxt->input->encoding != NULL)
10327 xmlFree((xmlChar *) ctxt->input->encoding);
10328 ctxt->input->encoding = encoding;
10329
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010330 handler = xmlFindCharEncodingHandler((const char *) encoding);
10331 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010332 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10333 /* failed to convert */
10334 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10335 return(NULL);
10336 }
Owen Taylor3473f882001-02-23 17:55:21 +000010337 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010338 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010339 "Unsupported encoding %s\n", encoding);
10340 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010341 }
10342 }
10343 }
10344 return(encoding);
10345}
10346
10347/**
10348 * xmlParseSDDecl:
10349 * @ctxt: an XML parser context
10350 *
10351 * parse the XML standalone declaration
10352 *
10353 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010354 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010355 *
10356 * [ VC: Standalone Document Declaration ]
10357 * TODO The standalone document declaration must have the value "no"
10358 * if any external markup declarations contain declarations of:
10359 * - attributes with default values, if elements to which these
10360 * attributes apply appear in the document without specifications
10361 * of values for these attributes, or
10362 * - entities (other than amp, lt, gt, apos, quot), if references
10363 * to those entities appear in the document, or
10364 * - attributes with values subject to normalization, where the
10365 * attribute appears in the document with a value which will change
10366 * as a result of normalization, or
10367 * - element types with element content, if white space occurs directly
10368 * within any instance of those types.
10369 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010370 * Returns:
10371 * 1 if standalone="yes"
10372 * 0 if standalone="no"
10373 * -2 if standalone attribute is missing or invalid
10374 * (A standalone value of -2 means that the XML declaration was found,
10375 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010376 */
10377
10378int
10379xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010380 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010381
10382 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010383 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010384 SKIP(10);
10385 SKIP_BLANKS;
10386 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010387 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010388 return(standalone);
10389 }
10390 NEXT;
10391 SKIP_BLANKS;
10392 if (RAW == '\''){
10393 NEXT;
10394 if ((RAW == 'n') && (NXT(1) == 'o')) {
10395 standalone = 0;
10396 SKIP(2);
10397 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10398 (NXT(2) == 's')) {
10399 standalone = 1;
10400 SKIP(3);
10401 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010402 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010403 }
10404 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010405 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010406 } else
10407 NEXT;
10408 } else if (RAW == '"'){
10409 NEXT;
10410 if ((RAW == 'n') && (NXT(1) == 'o')) {
10411 standalone = 0;
10412 SKIP(2);
10413 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10414 (NXT(2) == 's')) {
10415 standalone = 1;
10416 SKIP(3);
10417 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010418 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010419 }
10420 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010421 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010422 } else
10423 NEXT;
10424 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010425 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010426 }
10427 }
10428 return(standalone);
10429}
10430
10431/**
10432 * xmlParseXMLDecl:
10433 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010434 *
Owen Taylor3473f882001-02-23 17:55:21 +000010435 * parse an XML declaration header
10436 *
10437 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10438 */
10439
10440void
10441xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10442 xmlChar *version;
10443
10444 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010445 * This value for standalone indicates that the document has an
10446 * XML declaration but it does not have a standalone attribute.
10447 * It will be overwritten later if a standalone attribute is found.
10448 */
10449 ctxt->input->standalone = -2;
10450
10451 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010452 * We know that '<?xml' is here.
10453 */
10454 SKIP(5);
10455
William M. Brack76e95df2003-10-18 16:20:14 +000010456 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010457 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10458 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010459 }
10460 SKIP_BLANKS;
10461
10462 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010463 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010464 */
10465 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010466 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010467 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010468 } else {
10469 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10470 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010471 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010472 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010473 if (ctxt->options & XML_PARSE_OLD10) {
10474 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10475 "Unsupported version '%s'\n",
10476 version);
10477 } else {
10478 if ((version[0] == '1') && ((version[1] == '.'))) {
10479 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10480 "Unsupported version '%s'\n",
10481 version, NULL);
10482 } else {
10483 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10484 "Unsupported version '%s'\n",
10485 version);
10486 }
10487 }
Daniel Veillard19840942001-11-29 16:11:38 +000010488 }
10489 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010490 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010491 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010492 }
Owen Taylor3473f882001-02-23 17:55:21 +000010493
10494 /*
10495 * We may have the encoding declaration
10496 */
William M. Brack76e95df2003-10-18 16:20:14 +000010497 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010498 if ((RAW == '?') && (NXT(1) == '>')) {
10499 SKIP(2);
10500 return;
10501 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010502 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010503 }
10504 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010505 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10506 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010507 /*
10508 * The XML REC instructs us to stop parsing right here
10509 */
10510 return;
10511 }
10512
10513 /*
10514 * We may have the standalone status.
10515 */
William M. Brack76e95df2003-10-18 16:20:14 +000010516 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010517 if ((RAW == '?') && (NXT(1) == '>')) {
10518 SKIP(2);
10519 return;
10520 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010522 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010523
10524 /*
10525 * We can grow the input buffer freely at that point
10526 */
10527 GROW;
10528
Owen Taylor3473f882001-02-23 17:55:21 +000010529 SKIP_BLANKS;
10530 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10531
10532 SKIP_BLANKS;
10533 if ((RAW == '?') && (NXT(1) == '>')) {
10534 SKIP(2);
10535 } else if (RAW == '>') {
10536 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010537 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010538 NEXT;
10539 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010540 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010541 MOVETO_ENDTAG(CUR_PTR);
10542 NEXT;
10543 }
10544}
10545
10546/**
10547 * xmlParseMisc:
10548 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010549 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010550 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010551 *
10552 * [27] Misc ::= Comment | PI | S
10553 */
10554
10555void
10556xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010557 while ((ctxt->instate != XML_PARSER_EOF) &&
10558 (((RAW == '<') && (NXT(1) == '?')) ||
10559 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10560 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010561 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010562 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010563 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010564 NEXT;
10565 } else
10566 xmlParseComment(ctxt);
10567 }
10568}
10569
10570/**
10571 * xmlParseDocument:
10572 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010573 *
Owen Taylor3473f882001-02-23 17:55:21 +000010574 * parse an XML document (and build a tree if using the standard SAX
10575 * interface).
10576 *
10577 * [1] document ::= prolog element Misc*
10578 *
10579 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10580 *
10581 * Returns 0, -1 in case of error. the parser context is augmented
10582 * as a result of the parsing.
10583 */
10584
10585int
10586xmlParseDocument(xmlParserCtxtPtr ctxt) {
10587 xmlChar start[4];
10588 xmlCharEncoding enc;
10589
10590 xmlInitParser();
10591
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010592 if ((ctxt == NULL) || (ctxt->input == NULL))
10593 return(-1);
10594
Owen Taylor3473f882001-02-23 17:55:21 +000010595 GROW;
10596
10597 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010598 * SAX: detecting the level.
10599 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010600 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010601
10602 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010603 * SAX: beginning of the document processing.
10604 */
10605 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10606 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010607 if (ctxt->instate == XML_PARSER_EOF)
10608 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010609
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010610 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010611 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010612 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010613 * Get the 4 first bytes and decode the charset
10614 * if enc != XML_CHAR_ENCODING_NONE
10615 * plug some encoding conversion routines.
10616 */
10617 start[0] = RAW;
10618 start[1] = NXT(1);
10619 start[2] = NXT(2);
10620 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010621 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010622 if (enc != XML_CHAR_ENCODING_NONE) {
10623 xmlSwitchEncoding(ctxt, enc);
10624 }
Owen Taylor3473f882001-02-23 17:55:21 +000010625 }
10626
10627
10628 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010629 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010630 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010631 }
10632
10633 /*
10634 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010635 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010636 * than just the first line, unless the amount of data is really
10637 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010638 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010639 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10640 GROW;
10641 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010642 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010643
10644 /*
10645 * Note that we will switch encoding on the fly.
10646 */
10647 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010648 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10649 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010650 /*
10651 * The XML REC instructs us to stop parsing right here
10652 */
10653 return(-1);
10654 }
10655 ctxt->standalone = ctxt->input->standalone;
10656 SKIP_BLANKS;
10657 } else {
10658 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10659 }
10660 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10661 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010662 if (ctxt->instate == XML_PARSER_EOF)
10663 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010664 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10665 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10666 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10667 }
Owen Taylor3473f882001-02-23 17:55:21 +000010668
10669 /*
10670 * The Misc part of the Prolog
10671 */
10672 GROW;
10673 xmlParseMisc(ctxt);
10674
10675 /*
10676 * Then possibly doc type declaration(s) and more Misc
10677 * (doctypedecl Misc*)?
10678 */
10679 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010680 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010681
10682 ctxt->inSubset = 1;
10683 xmlParseDocTypeDecl(ctxt);
10684 if (RAW == '[') {
10685 ctxt->instate = XML_PARSER_DTD;
10686 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010687 if (ctxt->instate == XML_PARSER_EOF)
10688 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010689 }
10690
10691 /*
10692 * Create and update the external subset.
10693 */
10694 ctxt->inSubset = 2;
10695 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10696 (!ctxt->disableSAX))
10697 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10698 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010699 if (ctxt->instate == XML_PARSER_EOF)
10700 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010701 ctxt->inSubset = 0;
10702
Daniel Veillardac4118d2008-01-11 05:27:32 +000010703 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010704
10705 ctxt->instate = XML_PARSER_PROLOG;
10706 xmlParseMisc(ctxt);
10707 }
10708
10709 /*
10710 * Time to start parsing the tree itself
10711 */
10712 GROW;
10713 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010714 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10715 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010716 } else {
10717 ctxt->instate = XML_PARSER_CONTENT;
10718 xmlParseElement(ctxt);
10719 ctxt->instate = XML_PARSER_EPILOG;
10720
10721
10722 /*
10723 * The Misc part at the end
10724 */
10725 xmlParseMisc(ctxt);
10726
Daniel Veillard561b7f82002-03-20 21:55:57 +000010727 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010728 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010729 }
10730 ctxt->instate = XML_PARSER_EOF;
10731 }
10732
10733 /*
10734 * SAX: end of the document processing.
10735 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010736 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010737 ctxt->sax->endDocument(ctxt->userData);
10738
Daniel Veillard5997aca2002-03-18 18:36:20 +000010739 /*
10740 * Remove locally kept entity definitions if the tree was not built
10741 */
10742 if ((ctxt->myDoc != NULL) &&
10743 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10744 xmlFreeDoc(ctxt->myDoc);
10745 ctxt->myDoc = NULL;
10746 }
10747
Daniel Veillardae0765b2008-07-31 19:54:59 +000010748 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10749 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10750 if (ctxt->valid)
10751 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10752 if (ctxt->nsWellFormed)
10753 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10754 if (ctxt->options & XML_PARSE_OLD10)
10755 ctxt->myDoc->properties |= XML_DOC_OLD10;
10756 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010757 if (! ctxt->wellFormed) {
10758 ctxt->valid = 0;
10759 return(-1);
10760 }
Owen Taylor3473f882001-02-23 17:55:21 +000010761 return(0);
10762}
10763
10764/**
10765 * xmlParseExtParsedEnt:
10766 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010767 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010768 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010769 * An external general parsed entity is well-formed if it matches the
10770 * production labeled extParsedEnt.
10771 *
10772 * [78] extParsedEnt ::= TextDecl? content
10773 *
10774 * Returns 0, -1 in case of error. the parser context is augmented
10775 * as a result of the parsing.
10776 */
10777
10778int
10779xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10780 xmlChar start[4];
10781 xmlCharEncoding enc;
10782
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010783 if ((ctxt == NULL) || (ctxt->input == NULL))
10784 return(-1);
10785
Owen Taylor3473f882001-02-23 17:55:21 +000010786 xmlDefaultSAXHandlerInit();
10787
Daniel Veillard309f81d2003-09-23 09:02:53 +000010788 xmlDetectSAX2(ctxt);
10789
Owen Taylor3473f882001-02-23 17:55:21 +000010790 GROW;
10791
10792 /*
10793 * SAX: beginning of the document processing.
10794 */
10795 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10796 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10797
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010798 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010799 * Get the 4 first bytes and decode the charset
10800 * if enc != XML_CHAR_ENCODING_NONE
10801 * plug some encoding conversion routines.
10802 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010803 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10804 start[0] = RAW;
10805 start[1] = NXT(1);
10806 start[2] = NXT(2);
10807 start[3] = NXT(3);
10808 enc = xmlDetectCharEncoding(start, 4);
10809 if (enc != XML_CHAR_ENCODING_NONE) {
10810 xmlSwitchEncoding(ctxt, enc);
10811 }
Owen Taylor3473f882001-02-23 17:55:21 +000010812 }
10813
10814
10815 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010816 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010817 }
10818
10819 /*
10820 * Check for the XMLDecl in the Prolog.
10821 */
10822 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010823 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010824
10825 /*
10826 * Note that we will switch encoding on the fly.
10827 */
10828 xmlParseXMLDecl(ctxt);
10829 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10830 /*
10831 * The XML REC instructs us to stop parsing right here
10832 */
10833 return(-1);
10834 }
10835 SKIP_BLANKS;
10836 } else {
10837 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10838 }
10839 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10840 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010841 if (ctxt->instate == XML_PARSER_EOF)
10842 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010843
10844 /*
10845 * Doing validity checking on chunk doesn't make sense
10846 */
10847 ctxt->instate = XML_PARSER_CONTENT;
10848 ctxt->validate = 0;
10849 ctxt->loadsubset = 0;
10850 ctxt->depth = 0;
10851
10852 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010853 if (ctxt->instate == XML_PARSER_EOF)
10854 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010855
Owen Taylor3473f882001-02-23 17:55:21 +000010856 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010857 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010858 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010859 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010860 }
10861
10862 /*
10863 * SAX: end of the document processing.
10864 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010865 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010866 ctxt->sax->endDocument(ctxt->userData);
10867
10868 if (! ctxt->wellFormed) return(-1);
10869 return(0);
10870}
10871
Daniel Veillard73b013f2003-09-30 12:36:01 +000010872#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010873/************************************************************************
10874 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010875 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010876 * *
10877 ************************************************************************/
10878
10879/**
10880 * xmlParseLookupSequence:
10881 * @ctxt: an XML parser context
10882 * @first: the first char to lookup
10883 * @next: the next char to lookup or zero
10884 * @third: the next char to lookup or zero
10885 *
10886 * Try to find if a sequence (first, next, third) or just (first next) or
10887 * (first) is available in the input stream.
10888 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10889 * to avoid rescanning sequences of bytes, it DOES change the state of the
10890 * parser, do not use liberally.
10891 *
10892 * Returns the index to the current parsing point if the full sequence
10893 * is available, -1 otherwise.
10894 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010895static int
Owen Taylor3473f882001-02-23 17:55:21 +000010896xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10897 xmlChar next, xmlChar third) {
10898 int base, len;
10899 xmlParserInputPtr in;
10900 const xmlChar *buf;
10901
10902 in = ctxt->input;
10903 if (in == NULL) return(-1);
10904 base = in->cur - in->base;
10905 if (base < 0) return(-1);
10906 if (ctxt->checkIndex > base)
10907 base = ctxt->checkIndex;
10908 if (in->buf == NULL) {
10909 buf = in->base;
10910 len = in->length;
10911 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010912 buf = xmlBufContent(in->buf->buffer);
10913 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010914 }
10915 /* take into account the sequence length */
10916 if (third) len -= 2;
10917 else if (next) len --;
10918 for (;base < len;base++) {
10919 if (buf[base] == first) {
10920 if (third != 0) {
10921 if ((buf[base + 1] != next) ||
10922 (buf[base + 2] != third)) continue;
10923 } else if (next != 0) {
10924 if (buf[base + 1] != next) continue;
10925 }
10926 ctxt->checkIndex = 0;
10927#ifdef DEBUG_PUSH
10928 if (next == 0)
10929 xmlGenericError(xmlGenericErrorContext,
10930 "PP: lookup '%c' found at %d\n",
10931 first, base);
10932 else if (third == 0)
10933 xmlGenericError(xmlGenericErrorContext,
10934 "PP: lookup '%c%c' found at %d\n",
10935 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010936 else
Owen Taylor3473f882001-02-23 17:55:21 +000010937 xmlGenericError(xmlGenericErrorContext,
10938 "PP: lookup '%c%c%c' found at %d\n",
10939 first, next, third, base);
10940#endif
10941 return(base - (in->cur - in->base));
10942 }
10943 }
10944 ctxt->checkIndex = base;
10945#ifdef DEBUG_PUSH
10946 if (next == 0)
10947 xmlGenericError(xmlGenericErrorContext,
10948 "PP: lookup '%c' failed\n", first);
10949 else if (third == 0)
10950 xmlGenericError(xmlGenericErrorContext,
10951 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010952 else
Owen Taylor3473f882001-02-23 17:55:21 +000010953 xmlGenericError(xmlGenericErrorContext,
10954 "PP: lookup '%c%c%c' failed\n", first, next, third);
10955#endif
10956 return(-1);
10957}
10958
10959/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010960 * xmlParseGetLasts:
10961 * @ctxt: an XML parser context
10962 * @lastlt: pointer to store the last '<' from the input
10963 * @lastgt: pointer to store the last '>' from the input
10964 *
10965 * Lookup the last < and > in the current chunk
10966 */
10967static void
10968xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10969 const xmlChar **lastgt) {
10970 const xmlChar *tmp;
10971
10972 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10973 xmlGenericError(xmlGenericErrorContext,
10974 "Internal error: xmlParseGetLasts\n");
10975 return;
10976 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010977 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010978 tmp = ctxt->input->end;
10979 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010980 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010981 if (tmp < ctxt->input->base) {
10982 *lastlt = NULL;
10983 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010984 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010985 *lastlt = tmp;
10986 tmp++;
10987 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10988 if (*tmp == '\'') {
10989 tmp++;
10990 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10991 if (tmp < ctxt->input->end) tmp++;
10992 } else if (*tmp == '"') {
10993 tmp++;
10994 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10995 if (tmp < ctxt->input->end) tmp++;
10996 } else
10997 tmp++;
10998 }
10999 if (tmp < ctxt->input->end)
11000 *lastgt = tmp;
11001 else {
11002 tmp = *lastlt;
11003 tmp--;
11004 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11005 if (tmp >= ctxt->input->base)
11006 *lastgt = tmp;
11007 else
11008 *lastgt = NULL;
11009 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011010 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011011 } else {
11012 *lastlt = NULL;
11013 *lastgt = NULL;
11014 }
11015}
11016/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011017 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011018 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011019 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011020 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011021 *
11022 * Check that the block of characters is okay as SCdata content [20]
11023 *
11024 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011025 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011026 */
11027static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011028xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011029 int ix;
11030 unsigned char c;
11031 int codepoint;
11032
11033 if ((utf == NULL) || (len <= 0))
11034 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011035
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011036 for (ix = 0; ix < len;) { /* string is 0-terminated */
11037 c = utf[ix];
11038 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11039 if (c >= 0x20)
11040 ix++;
11041 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11042 ix++;
11043 else
11044 return(-ix);
11045 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011046 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011047 if ((utf[ix+1] & 0xc0 ) != 0x80)
11048 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011049 codepoint = (utf[ix] & 0x1f) << 6;
11050 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011051 if (!xmlIsCharQ(codepoint))
11052 return(-ix);
11053 ix += 2;
11054 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011055 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011056 if (((utf[ix+1] & 0xc0) != 0x80) ||
11057 ((utf[ix+2] & 0xc0) != 0x80))
11058 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011059 codepoint = (utf[ix] & 0xf) << 12;
11060 codepoint |= (utf[ix+1] & 0x3f) << 6;
11061 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011062 if (!xmlIsCharQ(codepoint))
11063 return(-ix);
11064 ix += 3;
11065 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011066 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011067 if (((utf[ix+1] & 0xc0) != 0x80) ||
11068 ((utf[ix+2] & 0xc0) != 0x80) ||
11069 ((utf[ix+3] & 0xc0) != 0x80))
11070 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011071 codepoint = (utf[ix] & 0x7) << 18;
11072 codepoint |= (utf[ix+1] & 0x3f) << 12;
11073 codepoint |= (utf[ix+2] & 0x3f) << 6;
11074 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011075 if (!xmlIsCharQ(codepoint))
11076 return(-ix);
11077 ix += 4;
11078 } else /* unknown encoding */
11079 return(-ix);
11080 }
11081 return(ix);
11082}
11083
11084/**
Owen Taylor3473f882001-02-23 17:55:21 +000011085 * xmlParseTryOrFinish:
11086 * @ctxt: an XML parser context
11087 * @terminate: last chunk indicator
11088 *
11089 * Try to progress on parsing
11090 *
11091 * Returns zero if no parsing was possible
11092 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011093static int
Owen Taylor3473f882001-02-23 17:55:21 +000011094xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11095 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011096 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011097 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011098 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011099
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011100 if (ctxt->input == NULL)
11101 return(0);
11102
Owen Taylor3473f882001-02-23 17:55:21 +000011103#ifdef DEBUG_PUSH
11104 switch (ctxt->instate) {
11105 case XML_PARSER_EOF:
11106 xmlGenericError(xmlGenericErrorContext,
11107 "PP: try EOF\n"); break;
11108 case XML_PARSER_START:
11109 xmlGenericError(xmlGenericErrorContext,
11110 "PP: try START\n"); break;
11111 case XML_PARSER_MISC:
11112 xmlGenericError(xmlGenericErrorContext,
11113 "PP: try MISC\n");break;
11114 case XML_PARSER_COMMENT:
11115 xmlGenericError(xmlGenericErrorContext,
11116 "PP: try COMMENT\n");break;
11117 case XML_PARSER_PROLOG:
11118 xmlGenericError(xmlGenericErrorContext,
11119 "PP: try PROLOG\n");break;
11120 case XML_PARSER_START_TAG:
11121 xmlGenericError(xmlGenericErrorContext,
11122 "PP: try START_TAG\n");break;
11123 case XML_PARSER_CONTENT:
11124 xmlGenericError(xmlGenericErrorContext,
11125 "PP: try CONTENT\n");break;
11126 case XML_PARSER_CDATA_SECTION:
11127 xmlGenericError(xmlGenericErrorContext,
11128 "PP: try CDATA_SECTION\n");break;
11129 case XML_PARSER_END_TAG:
11130 xmlGenericError(xmlGenericErrorContext,
11131 "PP: try END_TAG\n");break;
11132 case XML_PARSER_ENTITY_DECL:
11133 xmlGenericError(xmlGenericErrorContext,
11134 "PP: try ENTITY_DECL\n");break;
11135 case XML_PARSER_ENTITY_VALUE:
11136 xmlGenericError(xmlGenericErrorContext,
11137 "PP: try ENTITY_VALUE\n");break;
11138 case XML_PARSER_ATTRIBUTE_VALUE:
11139 xmlGenericError(xmlGenericErrorContext,
11140 "PP: try ATTRIBUTE_VALUE\n");break;
11141 case XML_PARSER_DTD:
11142 xmlGenericError(xmlGenericErrorContext,
11143 "PP: try DTD\n");break;
11144 case XML_PARSER_EPILOG:
11145 xmlGenericError(xmlGenericErrorContext,
11146 "PP: try EPILOG\n");break;
11147 case XML_PARSER_PI:
11148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: try PI\n");break;
11150 case XML_PARSER_IGNORE:
11151 xmlGenericError(xmlGenericErrorContext,
11152 "PP: try IGNORE\n");break;
11153 }
11154#endif
11155
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011156 if ((ctxt->input != NULL) &&
11157 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011158 xmlSHRINK(ctxt);
11159 ctxt->checkIndex = 0;
11160 }
11161 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011162
Daniel Veillarde50ba812013-04-11 15:54:51 +080011163 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011164 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011165 return(0);
11166
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011167 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011168 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011169 avail = ctxt->input->length -
11170 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011171 else {
11172 /*
11173 * If we are operating on converted input, try to flush
11174 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011175 * buffer. But do not do this in document start where
11176 * encoding="..." may not have been read and we work on a
11177 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011178 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011179 if ((ctxt->instate != XML_PARSER_START) &&
11180 (ctxt->input->buf->raw != NULL) &&
11181 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011182 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11183 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011184 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011185
11186 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011187 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11188 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011189 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011190 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011191 (ctxt->input->cur - ctxt->input->base);
11192 }
Owen Taylor3473f882001-02-23 17:55:21 +000011193 if (avail < 1)
11194 goto done;
11195 switch (ctxt->instate) {
11196 case XML_PARSER_EOF:
11197 /*
11198 * Document parsing is done !
11199 */
11200 goto done;
11201 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011202 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11203 xmlChar start[4];
11204 xmlCharEncoding enc;
11205
11206 /*
11207 * Very first chars read from the document flow.
11208 */
11209 if (avail < 4)
11210 goto done;
11211
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011212 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011213 * Get the 4 first bytes and decode the charset
11214 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011215 * plug some encoding conversion routines,
11216 * else xmlSwitchEncoding will set to (default)
11217 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011218 */
11219 start[0] = RAW;
11220 start[1] = NXT(1);
11221 start[2] = NXT(2);
11222 start[3] = NXT(3);
11223 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011224 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011225 break;
11226 }
Owen Taylor3473f882001-02-23 17:55:21 +000011227
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011228 if (avail < 2)
11229 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011230 cur = ctxt->input->cur[0];
11231 next = ctxt->input->cur[1];
11232 if (cur == 0) {
11233 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11234 ctxt->sax->setDocumentLocator(ctxt->userData,
11235 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011236 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011237 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011238#ifdef DEBUG_PUSH
11239 xmlGenericError(xmlGenericErrorContext,
11240 "PP: entering EOF\n");
11241#endif
11242 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11243 ctxt->sax->endDocument(ctxt->userData);
11244 goto done;
11245 }
11246 if ((cur == '<') && (next == '?')) {
11247 /* PI or XML decl */
11248 if (avail < 5) return(ret);
11249 if ((!terminate) &&
11250 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11251 return(ret);
11252 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11253 ctxt->sax->setDocumentLocator(ctxt->userData,
11254 &xmlDefaultSAXLocator);
11255 if ((ctxt->input->cur[2] == 'x') &&
11256 (ctxt->input->cur[3] == 'm') &&
11257 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011258 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011259 ret += 5;
11260#ifdef DEBUG_PUSH
11261 xmlGenericError(xmlGenericErrorContext,
11262 "PP: Parsing XML Decl\n");
11263#endif
11264 xmlParseXMLDecl(ctxt);
11265 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11266 /*
11267 * The XML REC instructs us to stop parsing right
11268 * here
11269 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011270 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011271 return(0);
11272 }
11273 ctxt->standalone = ctxt->input->standalone;
11274 if ((ctxt->encoding == NULL) &&
11275 (ctxt->input->encoding != NULL))
11276 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11277 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11278 (!ctxt->disableSAX))
11279 ctxt->sax->startDocument(ctxt->userData);
11280 ctxt->instate = XML_PARSER_MISC;
11281#ifdef DEBUG_PUSH
11282 xmlGenericError(xmlGenericErrorContext,
11283 "PP: entering MISC\n");
11284#endif
11285 } else {
11286 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11287 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11288 (!ctxt->disableSAX))
11289 ctxt->sax->startDocument(ctxt->userData);
11290 ctxt->instate = XML_PARSER_MISC;
11291#ifdef DEBUG_PUSH
11292 xmlGenericError(xmlGenericErrorContext,
11293 "PP: entering MISC\n");
11294#endif
11295 }
11296 } else {
11297 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11298 ctxt->sax->setDocumentLocator(ctxt->userData,
11299 &xmlDefaultSAXLocator);
11300 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011301 if (ctxt->version == NULL) {
11302 xmlErrMemory(ctxt, NULL);
11303 break;
11304 }
Owen Taylor3473f882001-02-23 17:55:21 +000011305 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11306 (!ctxt->disableSAX))
11307 ctxt->sax->startDocument(ctxt->userData);
11308 ctxt->instate = XML_PARSER_MISC;
11309#ifdef DEBUG_PUSH
11310 xmlGenericError(xmlGenericErrorContext,
11311 "PP: entering MISC\n");
11312#endif
11313 }
11314 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011315 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011316 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011317 const xmlChar *prefix = NULL;
11318 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011319 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011320
11321 if ((avail < 2) && (ctxt->inputNr == 1))
11322 goto done;
11323 cur = ctxt->input->cur[0];
11324 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011325 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011326 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011327 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11328 ctxt->sax->endDocument(ctxt->userData);
11329 goto done;
11330 }
11331 if (!terminate) {
11332 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011333 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011334 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011335 goto done;
11336 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11337 goto done;
11338 }
11339 }
11340 if (ctxt->spaceNr == 0)
11341 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011342 else if (*ctxt->space == -2)
11343 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011344 else
11345 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011346#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011347 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011348#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011349 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011350#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011351 else
11352 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011353#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011354 if (ctxt->instate == XML_PARSER_EOF)
11355 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011356 if (name == NULL) {
11357 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011358 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011359 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11360 ctxt->sax->endDocument(ctxt->userData);
11361 goto done;
11362 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011363#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011364 /*
11365 * [ VC: Root Element Type ]
11366 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011367 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011368 */
11369 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11370 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11371 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011372#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011373
11374 /*
11375 * Check for an Empty Element.
11376 */
11377 if ((RAW == '/') && (NXT(1) == '>')) {
11378 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011379
11380 if (ctxt->sax2) {
11381 if ((ctxt->sax != NULL) &&
11382 (ctxt->sax->endElementNs != NULL) &&
11383 (!ctxt->disableSAX))
11384 ctxt->sax->endElementNs(ctxt->userData, name,
11385 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011386 if (ctxt->nsNr - nsNr > 0)
11387 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011388#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011389 } else {
11390 if ((ctxt->sax != NULL) &&
11391 (ctxt->sax->endElement != NULL) &&
11392 (!ctxt->disableSAX))
11393 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011394#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011395 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011396 if (ctxt->instate == XML_PARSER_EOF)
11397 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011398 spacePop(ctxt);
11399 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011400 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011401 } else {
11402 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011403 }
Daniel Veillard65686452012-07-19 18:25:01 +080011404 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011405 break;
11406 }
11407 if (RAW == '>') {
11408 NEXT;
11409 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011410 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011411 "Couldn't find end of Start Tag %s\n",
11412 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011413 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011414 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011415 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011416 if (ctxt->sax2)
11417 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011418#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011419 else
11420 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011421#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011422
Daniel Veillarda880b122003-04-21 21:36:41 +000011423 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011424 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011425 break;
11426 }
11427 case XML_PARSER_CONTENT: {
11428 const xmlChar *test;
11429 unsigned int cons;
11430 if ((avail < 2) && (ctxt->inputNr == 1))
11431 goto done;
11432 cur = ctxt->input->cur[0];
11433 next = ctxt->input->cur[1];
11434
11435 test = CUR_PTR;
11436 cons = ctxt->input->consumed;
11437 if ((cur == '<') && (next == '/')) {
11438 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011439 break;
11440 } else if ((cur == '<') && (next == '?')) {
11441 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011442 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11443 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011444 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011445 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011446 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011447 ctxt->instate = XML_PARSER_CONTENT;
11448 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011449 } else if ((cur == '<') && (next != '!')) {
11450 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011451 break;
11452 } else if ((cur == '<') && (next == '!') &&
11453 (ctxt->input->cur[2] == '-') &&
11454 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011455 int term;
11456
11457 if (avail < 4)
11458 goto done;
11459 ctxt->input->cur += 4;
11460 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11461 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011462 if ((!terminate) && (term < 0)) {
11463 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011464 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011465 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011466 xmlParseComment(ctxt);
11467 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011468 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011469 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11470 (ctxt->input->cur[2] == '[') &&
11471 (ctxt->input->cur[3] == 'C') &&
11472 (ctxt->input->cur[4] == 'D') &&
11473 (ctxt->input->cur[5] == 'A') &&
11474 (ctxt->input->cur[6] == 'T') &&
11475 (ctxt->input->cur[7] == 'A') &&
11476 (ctxt->input->cur[8] == '[')) {
11477 SKIP(9);
11478 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011479 break;
11480 } else if ((cur == '<') && (next == '!') &&
11481 (avail < 9)) {
11482 goto done;
11483 } else if (cur == '&') {
11484 if ((!terminate) &&
11485 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11486 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011487 xmlParseReference(ctxt);
11488 } else {
11489 /* TODO Avoid the extra copy, handle directly !!! */
11490 /*
11491 * Goal of the following test is:
11492 * - minimize calls to the SAX 'character' callback
11493 * when they are mergeable
11494 * - handle an problem for isBlank when we only parse
11495 * a sequence of blank chars and the next one is
11496 * not available to check against '<' presence.
11497 * - tries to homogenize the differences in SAX
11498 * callbacks between the push and pull versions
11499 * of the parser.
11500 */
11501 if ((ctxt->inputNr == 1) &&
11502 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11503 if (!terminate) {
11504 if (ctxt->progressive) {
11505 if ((lastlt == NULL) ||
11506 (ctxt->input->cur > lastlt))
11507 goto done;
11508 } else if (xmlParseLookupSequence(ctxt,
11509 '<', 0, 0) < 0) {
11510 goto done;
11511 }
11512 }
11513 }
11514 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011515 xmlParseCharData(ctxt, 0);
11516 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011517 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011518 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11519 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011520 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011521 break;
11522 }
11523 break;
11524 }
11525 case XML_PARSER_END_TAG:
11526 if (avail < 2)
11527 goto done;
11528 if (!terminate) {
11529 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011530 /* > can be found unescaped in attribute values */
11531 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011532 goto done;
11533 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11534 goto done;
11535 }
11536 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011537 if (ctxt->sax2) {
11538 xmlParseEndTag2(ctxt,
11539 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11540 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011541 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011542 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011543 }
11544#ifdef LIBXML_SAX1_ENABLED
11545 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011546 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011547#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011548 if (ctxt->instate == XML_PARSER_EOF) {
11549 /* Nothing */
11550 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011551 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011552 } else {
11553 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011554 }
11555 break;
11556 case XML_PARSER_CDATA_SECTION: {
11557 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011558 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011559 * cdataBlock merge back contiguous callbacks.
11560 */
11561 int base;
11562
11563 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11564 if (base < 0) {
11565 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011566 int tmp;
11567
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011568 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011569 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011570 if (tmp < 0) {
11571 tmp = -tmp;
11572 ctxt->input->cur += tmp;
11573 goto encoding_error;
11574 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011575 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11576 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011577 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011578 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011579 else if (ctxt->sax->characters != NULL)
11580 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011581 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011582 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011583 if (ctxt->instate == XML_PARSER_EOF)
11584 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011585 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011586 ctxt->checkIndex = 0;
11587 }
11588 goto done;
11589 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011590 int tmp;
11591
David Kilzer4f8606c2016-01-05 13:38:09 -080011592 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011593 if ((tmp < 0) || (tmp != base)) {
11594 tmp = -tmp;
11595 ctxt->input->cur += tmp;
11596 goto encoding_error;
11597 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011598 if ((ctxt->sax != NULL) && (base == 0) &&
11599 (ctxt->sax->cdataBlock != NULL) &&
11600 (!ctxt->disableSAX)) {
11601 /*
11602 * Special case to provide identical behaviour
11603 * between pull and push parsers on enpty CDATA
11604 * sections
11605 */
11606 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11607 (!strncmp((const char *)&ctxt->input->cur[-9],
11608 "<![CDATA[", 9)))
11609 ctxt->sax->cdataBlock(ctxt->userData,
11610 BAD_CAST "", 0);
11611 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011612 (!ctxt->disableSAX)) {
11613 if (ctxt->sax->cdataBlock != NULL)
11614 ctxt->sax->cdataBlock(ctxt->userData,
11615 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011616 else if (ctxt->sax->characters != NULL)
11617 ctxt->sax->characters(ctxt->userData,
11618 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011619 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011620 if (ctxt->instate == XML_PARSER_EOF)
11621 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011622 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011623 ctxt->checkIndex = 0;
11624 ctxt->instate = XML_PARSER_CONTENT;
11625#ifdef DEBUG_PUSH
11626 xmlGenericError(xmlGenericErrorContext,
11627 "PP: entering CONTENT\n");
11628#endif
11629 }
11630 break;
11631 }
Owen Taylor3473f882001-02-23 17:55:21 +000011632 case XML_PARSER_MISC:
11633 SKIP_BLANKS;
11634 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011635 avail = ctxt->input->length -
11636 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011637 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011638 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011639 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011640 if (avail < 2)
11641 goto done;
11642 cur = ctxt->input->cur[0];
11643 next = ctxt->input->cur[1];
11644 if ((cur == '<') && (next == '?')) {
11645 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011646 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11647 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011648 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011649 }
Owen Taylor3473f882001-02-23 17:55:21 +000011650#ifdef DEBUG_PUSH
11651 xmlGenericError(xmlGenericErrorContext,
11652 "PP: Parsing PI\n");
11653#endif
11654 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011655 if (ctxt->instate == XML_PARSER_EOF)
11656 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011657 ctxt->instate = XML_PARSER_MISC;
11658 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011659 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011660 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011661 (ctxt->input->cur[2] == '-') &&
11662 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011663 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011664 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11665 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011666 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011667 }
Owen Taylor3473f882001-02-23 17:55:21 +000011668#ifdef DEBUG_PUSH
11669 xmlGenericError(xmlGenericErrorContext,
11670 "PP: Parsing Comment\n");
11671#endif
11672 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011673 if (ctxt->instate == XML_PARSER_EOF)
11674 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011675 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011676 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011677 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011678 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011679 (ctxt->input->cur[2] == 'D') &&
11680 (ctxt->input->cur[3] == 'O') &&
11681 (ctxt->input->cur[4] == 'C') &&
11682 (ctxt->input->cur[5] == 'T') &&
11683 (ctxt->input->cur[6] == 'Y') &&
11684 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011685 (ctxt->input->cur[8] == 'E')) {
11686 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011687 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11688 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011689 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011690 }
Owen Taylor3473f882001-02-23 17:55:21 +000011691#ifdef DEBUG_PUSH
11692 xmlGenericError(xmlGenericErrorContext,
11693 "PP: Parsing internal subset\n");
11694#endif
11695 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011696 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011697 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011698 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011699 if (ctxt->instate == XML_PARSER_EOF)
11700 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011701 if (RAW == '[') {
11702 ctxt->instate = XML_PARSER_DTD;
11703#ifdef DEBUG_PUSH
11704 xmlGenericError(xmlGenericErrorContext,
11705 "PP: entering DTD\n");
11706#endif
11707 } else {
11708 /*
11709 * Create and update the external subset.
11710 */
11711 ctxt->inSubset = 2;
11712 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11713 (ctxt->sax->externalSubset != NULL))
11714 ctxt->sax->externalSubset(ctxt->userData,
11715 ctxt->intSubName, ctxt->extSubSystem,
11716 ctxt->extSubURI);
11717 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011718 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011719 ctxt->instate = XML_PARSER_PROLOG;
11720#ifdef DEBUG_PUSH
11721 xmlGenericError(xmlGenericErrorContext,
11722 "PP: entering PROLOG\n");
11723#endif
11724 }
11725 } else if ((cur == '<') && (next == '!') &&
11726 (avail < 9)) {
11727 goto done;
11728 } else {
11729 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011730 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011731 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011732#ifdef DEBUG_PUSH
11733 xmlGenericError(xmlGenericErrorContext,
11734 "PP: entering START_TAG\n");
11735#endif
11736 }
11737 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011738 case XML_PARSER_PROLOG:
11739 SKIP_BLANKS;
11740 if (ctxt->input->buf == NULL)
11741 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11742 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011743 avail = xmlBufUse(ctxt->input->buf->buffer) -
11744 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011745 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011746 goto done;
11747 cur = ctxt->input->cur[0];
11748 next = ctxt->input->cur[1];
11749 if ((cur == '<') && (next == '?')) {
11750 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011751 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11752 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011753 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011754 }
Owen Taylor3473f882001-02-23 17:55:21 +000011755#ifdef DEBUG_PUSH
11756 xmlGenericError(xmlGenericErrorContext,
11757 "PP: Parsing PI\n");
11758#endif
11759 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011760 if (ctxt->instate == XML_PARSER_EOF)
11761 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011762 ctxt->instate = XML_PARSER_PROLOG;
11763 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011764 } else if ((cur == '<') && (next == '!') &&
11765 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11766 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011767 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11768 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011769 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011770 }
Owen Taylor3473f882001-02-23 17:55:21 +000011771#ifdef DEBUG_PUSH
11772 xmlGenericError(xmlGenericErrorContext,
11773 "PP: Parsing Comment\n");
11774#endif
11775 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011776 if (ctxt->instate == XML_PARSER_EOF)
11777 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011778 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011779 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011780 } else if ((cur == '<') && (next == '!') &&
11781 (avail < 4)) {
11782 goto done;
11783 } else {
11784 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011785 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011786 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011787 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011788#ifdef DEBUG_PUSH
11789 xmlGenericError(xmlGenericErrorContext,
11790 "PP: entering START_TAG\n");
11791#endif
11792 }
11793 break;
11794 case XML_PARSER_EPILOG:
11795 SKIP_BLANKS;
11796 if (ctxt->input->buf == NULL)
11797 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11798 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011799 avail = xmlBufUse(ctxt->input->buf->buffer) -
11800 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011801 if (avail < 2)
11802 goto done;
11803 cur = ctxt->input->cur[0];
11804 next = ctxt->input->cur[1];
11805 if ((cur == '<') && (next == '?')) {
11806 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011807 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11808 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011809 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011810 }
Owen Taylor3473f882001-02-23 17:55:21 +000011811#ifdef DEBUG_PUSH
11812 xmlGenericError(xmlGenericErrorContext,
11813 "PP: Parsing PI\n");
11814#endif
11815 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011816 if (ctxt->instate == XML_PARSER_EOF)
11817 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011818 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011819 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011820 } else if ((cur == '<') && (next == '!') &&
11821 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11822 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011823 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11824 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011825 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011826 }
Owen Taylor3473f882001-02-23 17:55:21 +000011827#ifdef DEBUG_PUSH
11828 xmlGenericError(xmlGenericErrorContext,
11829 "PP: Parsing Comment\n");
11830#endif
11831 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011832 if (ctxt->instate == XML_PARSER_EOF)
11833 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011834 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011835 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011836 } else if ((cur == '<') && (next == '!') &&
11837 (avail < 4)) {
11838 goto done;
11839 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011840 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011841 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011842#ifdef DEBUG_PUSH
11843 xmlGenericError(xmlGenericErrorContext,
11844 "PP: entering EOF\n");
11845#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011846 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011847 ctxt->sax->endDocument(ctxt->userData);
11848 goto done;
11849 }
11850 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011851 case XML_PARSER_DTD: {
11852 /*
11853 * Sorry but progressive parsing of the internal subset
11854 * is not expected to be supported. We first check that
11855 * the full content of the internal subset is available and
11856 * the parsing is launched only at that point.
11857 * Internal subset ends up with "']' S? '>'" in an unescaped
11858 * section and not in a ']]>' sequence which are conditional
11859 * sections (whoever argued to keep that crap in XML deserve
11860 * a place in hell !).
11861 */
11862 int base, i;
11863 xmlChar *buf;
11864 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011865 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011866
11867 base = ctxt->input->cur - ctxt->input->base;
11868 if (base < 0) return(0);
11869 if (ctxt->checkIndex > base)
11870 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011871 buf = xmlBufContent(ctxt->input->buf->buffer);
11872 use = xmlBufUse(ctxt->input->buf->buffer);
11873 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011874 if (quote != 0) {
11875 if (buf[base] == quote)
11876 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011877 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011878 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011879 if ((quote == 0) && (buf[base] == '<')) {
11880 int found = 0;
11881 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011882 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011883 (buf[base + 1] == '!') &&
11884 (buf[base + 2] == '-') &&
11885 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011886 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011887 if ((buf[base] == '-') &&
11888 (buf[base + 1] == '-') &&
11889 (buf[base + 2] == '>')) {
11890 found = 1;
11891 base += 2;
11892 break;
11893 }
11894 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011895 if (!found) {
11896#if 0
11897 fprintf(stderr, "unfinished comment\n");
11898#endif
11899 break; /* for */
11900 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011901 continue;
11902 }
11903 }
Owen Taylor3473f882001-02-23 17:55:21 +000011904 if (buf[base] == '"') {
11905 quote = '"';
11906 continue;
11907 }
11908 if (buf[base] == '\'') {
11909 quote = '\'';
11910 continue;
11911 }
11912 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011913#if 0
11914 fprintf(stderr, "%c%c%c%c: ", buf[base],
11915 buf[base + 1], buf[base + 2], buf[base + 3]);
11916#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011917 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011918 break;
11919 if (buf[base + 1] == ']') {
11920 /* conditional crap, skip both ']' ! */
11921 base++;
11922 continue;
11923 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011924 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011925 if (buf[base + i] == '>') {
11926#if 0
11927 fprintf(stderr, "found\n");
11928#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011929 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011930 }
11931 if (!IS_BLANK_CH(buf[base + i])) {
11932#if 0
11933 fprintf(stderr, "not found\n");
11934#endif
11935 goto not_end_of_int_subset;
11936 }
Owen Taylor3473f882001-02-23 17:55:21 +000011937 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011938#if 0
11939 fprintf(stderr, "end of stream\n");
11940#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011941 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011942
Owen Taylor3473f882001-02-23 17:55:21 +000011943 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011944not_end_of_int_subset:
11945 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011946 }
11947 /*
11948 * We didn't found the end of the Internal subset
11949 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011950 if (quote == 0)
11951 ctxt->checkIndex = base;
11952 else
11953 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011954#ifdef DEBUG_PUSH
11955 if (next == 0)
11956 xmlGenericError(xmlGenericErrorContext,
11957 "PP: lookup of int subset end filed\n");
11958#endif
11959 goto done;
11960
11961found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011962 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011963 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011964 if (ctxt->instate == XML_PARSER_EOF)
11965 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011966 ctxt->inSubset = 2;
11967 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11968 (ctxt->sax->externalSubset != NULL))
11969 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11970 ctxt->extSubSystem, ctxt->extSubURI);
11971 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011972 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011973 if (ctxt->instate == XML_PARSER_EOF)
11974 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011975 ctxt->instate = XML_PARSER_PROLOG;
11976 ctxt->checkIndex = 0;
11977#ifdef DEBUG_PUSH
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: entering PROLOG\n");
11980#endif
11981 break;
11982 }
11983 case XML_PARSER_COMMENT:
11984 xmlGenericError(xmlGenericErrorContext,
11985 "PP: internal error, state == COMMENT\n");
11986 ctxt->instate = XML_PARSER_CONTENT;
11987#ifdef DEBUG_PUSH
11988 xmlGenericError(xmlGenericErrorContext,
11989 "PP: entering CONTENT\n");
11990#endif
11991 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011992 case XML_PARSER_IGNORE:
11993 xmlGenericError(xmlGenericErrorContext,
11994 "PP: internal error, state == IGNORE");
11995 ctxt->instate = XML_PARSER_DTD;
11996#ifdef DEBUG_PUSH
11997 xmlGenericError(xmlGenericErrorContext,
11998 "PP: entering DTD\n");
11999#endif
12000 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012001 case XML_PARSER_PI:
12002 xmlGenericError(xmlGenericErrorContext,
12003 "PP: internal error, state == PI\n");
12004 ctxt->instate = XML_PARSER_CONTENT;
12005#ifdef DEBUG_PUSH
12006 xmlGenericError(xmlGenericErrorContext,
12007 "PP: entering CONTENT\n");
12008#endif
12009 break;
12010 case XML_PARSER_ENTITY_DECL:
12011 xmlGenericError(xmlGenericErrorContext,
12012 "PP: internal error, state == ENTITY_DECL\n");
12013 ctxt->instate = XML_PARSER_DTD;
12014#ifdef DEBUG_PUSH
12015 xmlGenericError(xmlGenericErrorContext,
12016 "PP: entering DTD\n");
12017#endif
12018 break;
12019 case XML_PARSER_ENTITY_VALUE:
12020 xmlGenericError(xmlGenericErrorContext,
12021 "PP: internal error, state == ENTITY_VALUE\n");
12022 ctxt->instate = XML_PARSER_CONTENT;
12023#ifdef DEBUG_PUSH
12024 xmlGenericError(xmlGenericErrorContext,
12025 "PP: entering DTD\n");
12026#endif
12027 break;
12028 case XML_PARSER_ATTRIBUTE_VALUE:
12029 xmlGenericError(xmlGenericErrorContext,
12030 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12031 ctxt->instate = XML_PARSER_START_TAG;
12032#ifdef DEBUG_PUSH
12033 xmlGenericError(xmlGenericErrorContext,
12034 "PP: entering START_TAG\n");
12035#endif
12036 break;
12037 case XML_PARSER_SYSTEM_LITERAL:
12038 xmlGenericError(xmlGenericErrorContext,
12039 "PP: internal error, state == SYSTEM_LITERAL\n");
12040 ctxt->instate = XML_PARSER_START_TAG;
12041#ifdef DEBUG_PUSH
12042 xmlGenericError(xmlGenericErrorContext,
12043 "PP: entering START_TAG\n");
12044#endif
12045 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012046 case XML_PARSER_PUBLIC_LITERAL:
12047 xmlGenericError(xmlGenericErrorContext,
12048 "PP: internal error, state == PUBLIC_LITERAL\n");
12049 ctxt->instate = XML_PARSER_START_TAG;
12050#ifdef DEBUG_PUSH
12051 xmlGenericError(xmlGenericErrorContext,
12052 "PP: entering START_TAG\n");
12053#endif
12054 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012055 }
12056 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012057done:
Owen Taylor3473f882001-02-23 17:55:21 +000012058#ifdef DEBUG_PUSH
12059 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12060#endif
12061 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012062encoding_error:
12063 {
12064 char buffer[150];
12065
12066 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12067 ctxt->input->cur[0], ctxt->input->cur[1],
12068 ctxt->input->cur[2], ctxt->input->cur[3]);
12069 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12070 "Input is not proper UTF-8, indicate encoding !\n%s",
12071 BAD_CAST buffer, NULL);
12072 }
12073 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012074}
12075
12076/**
Daniel Veillard65686452012-07-19 18:25:01 +080012077 * xmlParseCheckTransition:
12078 * @ctxt: an XML parser context
12079 * @chunk: a char array
12080 * @size: the size in byte of the chunk
12081 *
12082 * Check depending on the current parser state if the chunk given must be
12083 * processed immediately or one need more data to advance on parsing.
12084 *
12085 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12086 */
12087static int
12088xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12089 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12090 return(-1);
12091 if (ctxt->instate == XML_PARSER_START_TAG) {
12092 if (memchr(chunk, '>', size) != NULL)
12093 return(1);
12094 return(0);
12095 }
12096 if (ctxt->progressive == XML_PARSER_COMMENT) {
12097 if (memchr(chunk, '>', size) != NULL)
12098 return(1);
12099 return(0);
12100 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012101 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12102 if (memchr(chunk, '>', size) != NULL)
12103 return(1);
12104 return(0);
12105 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012106 if (ctxt->progressive == XML_PARSER_PI) {
12107 if (memchr(chunk, '>', size) != NULL)
12108 return(1);
12109 return(0);
12110 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012111 if (ctxt->instate == XML_PARSER_END_TAG) {
12112 if (memchr(chunk, '>', size) != NULL)
12113 return(1);
12114 return(0);
12115 }
12116 if ((ctxt->progressive == XML_PARSER_DTD) ||
12117 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012118 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012119 return(1);
12120 return(0);
12121 }
Daniel Veillard65686452012-07-19 18:25:01 +080012122 return(1);
12123}
12124
12125/**
Owen Taylor3473f882001-02-23 17:55:21 +000012126 * xmlParseChunk:
12127 * @ctxt: an XML parser context
12128 * @chunk: an char array
12129 * @size: the size in byte of the chunk
12130 * @terminate: last chunk indicator
12131 *
12132 * Parse a Chunk of memory
12133 *
12134 * Returns zero if no error, the xmlParserErrors otherwise.
12135 */
12136int
12137xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12138 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012139 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012140 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012141 size_t old_avail = 0;
12142 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012143
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012144 if (ctxt == NULL)
12145 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012146 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012147 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012148 if (ctxt->instate == XML_PARSER_EOF)
12149 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012150 if (ctxt->instate == XML_PARSER_START)
12151 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012152 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12153 (chunk[size - 1] == '\r')) {
12154 end_in_lf = 1;
12155 size--;
12156 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012157
12158xmldecl_done:
12159
Owen Taylor3473f882001-02-23 17:55:21 +000012160 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12161 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012162 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12163 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012164 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012165
Daniel Veillard65686452012-07-19 18:25:01 +080012166 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012167 /*
12168 * Specific handling if we autodetected an encoding, we should not
12169 * push more than the first line ... which depend on the encoding
12170 * And only push the rest once the final encoding was detected
12171 */
12172 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12173 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012174 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012175
12176 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177 BAD_CAST "UTF-16")) ||
12178 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12179 BAD_CAST "UTF16")))
12180 len = 90;
12181 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182 BAD_CAST "UCS-4")) ||
12183 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12184 BAD_CAST "UCS4")))
12185 len = 180;
12186
12187 if (ctxt->input->buf->rawconsumed < len)
12188 len -= ctxt->input->buf->rawconsumed;
12189
Raul Hudeaba9716a2010-03-15 10:13:29 +010012190 /*
12191 * Change size for reading the initial declaration only
12192 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12193 * will blindly copy extra bytes from memory.
12194 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012195 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012196 remain = size - len;
12197 size = len;
12198 } else {
12199 remain = 0;
12200 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012201 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012202 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012203 if (res < 0) {
12204 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012205 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012206 return (XML_PARSER_EOF);
12207 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012208 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012209#ifdef DEBUG_PUSH
12210 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12211#endif
12212
Owen Taylor3473f882001-02-23 17:55:21 +000012213 } else if (ctxt->instate != XML_PARSER_EOF) {
12214 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12215 xmlParserInputBufferPtr in = ctxt->input->buf;
12216 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12217 (in->raw != NULL)) {
12218 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012219 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12220 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012221
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012222 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012223 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012224 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012225 xmlGenericError(xmlGenericErrorContext,
12226 "xmlParseChunk: encoder error\n");
12227 return(XML_ERR_INVALID_ENCODING);
12228 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012229 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012230 }
12231 }
12232 }
Daniel Veillard65686452012-07-19 18:25:01 +080012233 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012234 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012235 } else {
12236 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12237 avail = xmlBufUse(ctxt->input->buf->buffer);
12238 /*
12239 * Depending on the current state it may not be such
12240 * a good idea to try parsing if there is nothing in the chunk
12241 * which would be worth doing a parser state transition and we
12242 * need to wait for more data
12243 */
12244 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12245 (old_avail == 0) || (avail == 0) ||
12246 (xmlParseCheckTransition(ctxt,
12247 (const char *)&ctxt->input->base[old_avail],
12248 avail - old_avail)))
12249 xmlParseTryOrFinish(ctxt, terminate);
12250 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012251 if (ctxt->instate == XML_PARSER_EOF)
12252 return(ctxt->errNo);
12253
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012254 if ((ctxt->input != NULL) &&
12255 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12256 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12257 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12258 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012259 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012260 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012261 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12262 return(ctxt->errNo);
12263
12264 if (remain != 0) {
12265 chunk += size;
12266 size = remain;
12267 remain = 0;
12268 goto xmldecl_done;
12269 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012270 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12271 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012272 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12273 ctxt->input);
12274 size_t current = ctxt->input->cur - ctxt->input->base;
12275
Daniel Veillarda617e242006-01-09 14:38:44 +000012276 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012277
12278 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12279 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012280 }
Owen Taylor3473f882001-02-23 17:55:21 +000012281 if (terminate) {
12282 /*
12283 * Check for termination
12284 */
Daniel Veillard65686452012-07-19 18:25:01 +080012285 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012286
12287 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012288 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012289 cur_avail = ctxt->input->length -
12290 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012291 else
Daniel Veillard65686452012-07-19 18:25:01 +080012292 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12293 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012294 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012295
Owen Taylor3473f882001-02-23 17:55:21 +000012296 if ((ctxt->instate != XML_PARSER_EOF) &&
12297 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012298 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012299 }
Daniel Veillard65686452012-07-19 18:25:01 +080012300 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012301 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012302 }
Owen Taylor3473f882001-02-23 17:55:21 +000012303 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012304 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012305 ctxt->sax->endDocument(ctxt->userData);
12306 }
12307 ctxt->instate = XML_PARSER_EOF;
12308 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012309 if (ctxt->wellFormed == 0)
12310 return((xmlParserErrors) ctxt->errNo);
12311 else
12312 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012313}
12314
12315/************************************************************************
12316 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012317 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012318 * *
12319 ************************************************************************/
12320
12321/**
Owen Taylor3473f882001-02-23 17:55:21 +000012322 * xmlCreatePushParserCtxt:
12323 * @sax: a SAX handler
12324 * @user_data: The user data returned on SAX callbacks
12325 * @chunk: a pointer to an array of chars
12326 * @size: number of chars in the array
12327 * @filename: an optional file name or URI
12328 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012329 * Create a parser context for using the XML parser in push mode.
12330 * If @buffer and @size are non-NULL, the data is used to detect
12331 * the encoding. The remaining characters will be parsed so they
12332 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012333 * To allow content encoding detection, @size should be >= 4
12334 * The value of @filename is used for fetching external entities
12335 * and error/warning reports.
12336 *
12337 * Returns the new parser context or NULL
12338 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012339
Owen Taylor3473f882001-02-23 17:55:21 +000012340xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012341xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012342 const char *chunk, int size, const char *filename) {
12343 xmlParserCtxtPtr ctxt;
12344 xmlParserInputPtr inputStream;
12345 xmlParserInputBufferPtr buf;
12346 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12347
12348 /*
12349 * plug some encoding conversion routines
12350 */
12351 if ((chunk != NULL) && (size >= 4))
12352 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12353
12354 buf = xmlAllocParserInputBuffer(enc);
12355 if (buf == NULL) return(NULL);
12356
12357 ctxt = xmlNewParserCtxt();
12358 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012359 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012360 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012361 return(NULL);
12362 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012363 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012364 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12365 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012366 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012367 xmlFreeParserInputBuffer(buf);
12368 xmlFreeParserCtxt(ctxt);
12369 return(NULL);
12370 }
Owen Taylor3473f882001-02-23 17:55:21 +000012371 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012372#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012373 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012374#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012375 xmlFree(ctxt->sax);
12376 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12377 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012378 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012379 xmlFreeParserInputBuffer(buf);
12380 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012381 return(NULL);
12382 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012383 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12384 if (sax->initialized == XML_SAX2_MAGIC)
12385 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12386 else
12387 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012388 if (user_data != NULL)
12389 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012390 }
Owen Taylor3473f882001-02-23 17:55:21 +000012391 if (filename == NULL) {
12392 ctxt->directory = NULL;
12393 } else {
12394 ctxt->directory = xmlParserGetDirectory(filename);
12395 }
12396
12397 inputStream = xmlNewInputStream(ctxt);
12398 if (inputStream == NULL) {
12399 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012400 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012401 return(NULL);
12402 }
12403
12404 if (filename == NULL)
12405 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012406 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012407 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012408 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012409 if (inputStream->filename == NULL) {
12410 xmlFreeParserCtxt(ctxt);
12411 xmlFreeParserInputBuffer(buf);
12412 return(NULL);
12413 }
12414 }
Owen Taylor3473f882001-02-23 17:55:21 +000012415 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012416 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012417 inputPush(ctxt, inputStream);
12418
William M. Brack3a1cd212005-02-11 14:35:54 +000012419 /*
12420 * If the caller didn't provide an initial 'chunk' for determining
12421 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12422 * that it can be automatically determined later
12423 */
12424 if ((size == 0) || (chunk == NULL)) {
12425 ctxt->charset = XML_CHAR_ENCODING_NONE;
12426 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012427 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12428 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012429
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012430 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012431
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012432 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012433#ifdef DEBUG_PUSH
12434 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12435#endif
12436 }
12437
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012438 if (enc != XML_CHAR_ENCODING_NONE) {
12439 xmlSwitchEncoding(ctxt, enc);
12440 }
12441
Owen Taylor3473f882001-02-23 17:55:21 +000012442 return(ctxt);
12443}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012444#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012445
12446/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012447 * xmlHaltParser:
12448 * @ctxt: an XML parser context
12449 *
12450 * Blocks further parser processing don't override error
12451 * for internal use
12452 */
12453static void
12454xmlHaltParser(xmlParserCtxtPtr ctxt) {
12455 if (ctxt == NULL)
12456 return;
12457 ctxt->instate = XML_PARSER_EOF;
12458 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012459 while (ctxt->inputNr > 1)
12460 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012461 if (ctxt->input != NULL) {
12462 /*
12463 * in case there was a specific allocation deallocate before
12464 * overriding base
12465 */
12466 if (ctxt->input->free != NULL) {
12467 ctxt->input->free((xmlChar *) ctxt->input->base);
12468 ctxt->input->free = NULL;
12469 }
12470 ctxt->input->cur = BAD_CAST"";
12471 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012472 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012473 }
12474}
12475
12476/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012477 * xmlStopParser:
12478 * @ctxt: an XML parser context
12479 *
12480 * Blocks further parser processing
12481 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012482void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012483xmlStopParser(xmlParserCtxtPtr ctxt) {
12484 if (ctxt == NULL)
12485 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012486 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012487 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012488}
12489
12490/**
Owen Taylor3473f882001-02-23 17:55:21 +000012491 * xmlCreateIOParserCtxt:
12492 * @sax: a SAX handler
12493 * @user_data: The user data returned on SAX callbacks
12494 * @ioread: an I/O read function
12495 * @ioclose: an I/O close function
12496 * @ioctx: an I/O handler
12497 * @enc: the charset encoding if known
12498 *
12499 * Create a parser context for using the XML parser with an existing
12500 * I/O stream
12501 *
12502 * Returns the new parser context or NULL
12503 */
12504xmlParserCtxtPtr
12505xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12506 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12507 void *ioctx, xmlCharEncoding enc) {
12508 xmlParserCtxtPtr ctxt;
12509 xmlParserInputPtr inputStream;
12510 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012511
Daniel Veillard42595322004-11-08 10:52:06 +000012512 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012513
12514 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012515 if (buf == NULL) {
12516 if (ioclose != NULL)
12517 ioclose(ioctx);
12518 return (NULL);
12519 }
Owen Taylor3473f882001-02-23 17:55:21 +000012520
12521 ctxt = xmlNewParserCtxt();
12522 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012523 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012524 return(NULL);
12525 }
12526 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012527#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012528 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012529#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012530 xmlFree(ctxt->sax);
12531 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12532 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012533 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012534 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012535 return(NULL);
12536 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012537 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12538 if (sax->initialized == XML_SAX2_MAGIC)
12539 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12540 else
12541 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012542 if (user_data != NULL)
12543 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012544 }
Owen Taylor3473f882001-02-23 17:55:21 +000012545
12546 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12547 if (inputStream == NULL) {
12548 xmlFreeParserCtxt(ctxt);
12549 return(NULL);
12550 }
12551 inputPush(ctxt, inputStream);
12552
12553 return(ctxt);
12554}
12555
Daniel Veillard4432df22003-09-28 18:58:27 +000012556#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012557/************************************************************************
12558 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012559 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012560 * *
12561 ************************************************************************/
12562
12563/**
12564 * xmlIOParseDTD:
12565 * @sax: the SAX handler block or NULL
12566 * @input: an Input Buffer
12567 * @enc: the charset encoding if known
12568 *
12569 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012570 *
Owen Taylor3473f882001-02-23 17:55:21 +000012571 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012572 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012573 */
12574
12575xmlDtdPtr
12576xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12577 xmlCharEncoding enc) {
12578 xmlDtdPtr ret = NULL;
12579 xmlParserCtxtPtr ctxt;
12580 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012581 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012582
12583 if (input == NULL)
12584 return(NULL);
12585
12586 ctxt = xmlNewParserCtxt();
12587 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012588 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012589 return(NULL);
12590 }
12591
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012592 /* We are loading a DTD */
12593 ctxt->options |= XML_PARSE_DTDLOAD;
12594
Owen Taylor3473f882001-02-23 17:55:21 +000012595 /*
12596 * Set-up the SAX context
12597 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012598 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012599 if (ctxt->sax != NULL)
12600 xmlFree(ctxt->sax);
12601 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012602 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012603 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012604 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012605
12606 /*
12607 * generate a parser input from the I/O handler
12608 */
12609
Daniel Veillard43caefb2003-12-07 19:32:22 +000012610 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012611 if (pinput == NULL) {
12612 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012613 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012614 xmlFreeParserCtxt(ctxt);
12615 return(NULL);
12616 }
12617
12618 /*
12619 * plug some encoding conversion routines here.
12620 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012621 if (xmlPushInput(ctxt, pinput) < 0) {
12622 if (sax != NULL) ctxt->sax = NULL;
12623 xmlFreeParserCtxt(ctxt);
12624 return(NULL);
12625 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012626 if (enc != XML_CHAR_ENCODING_NONE) {
12627 xmlSwitchEncoding(ctxt, enc);
12628 }
Owen Taylor3473f882001-02-23 17:55:21 +000012629
12630 pinput->filename = NULL;
12631 pinput->line = 1;
12632 pinput->col = 1;
12633 pinput->base = ctxt->input->cur;
12634 pinput->cur = ctxt->input->cur;
12635 pinput->free = NULL;
12636
12637 /*
12638 * let's parse that entity knowing it's an external subset.
12639 */
12640 ctxt->inSubset = 2;
12641 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012642 if (ctxt->myDoc == NULL) {
12643 xmlErrMemory(ctxt, "New Doc failed");
12644 return(NULL);
12645 }
12646 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012647 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12648 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012649
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012650 if ((enc == XML_CHAR_ENCODING_NONE) &&
12651 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012652 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012653 * Get the 4 first bytes and decode the charset
12654 * if enc != XML_CHAR_ENCODING_NONE
12655 * plug some encoding conversion routines.
12656 */
12657 start[0] = RAW;
12658 start[1] = NXT(1);
12659 start[2] = NXT(2);
12660 start[3] = NXT(3);
12661 enc = xmlDetectCharEncoding(start, 4);
12662 if (enc != XML_CHAR_ENCODING_NONE) {
12663 xmlSwitchEncoding(ctxt, enc);
12664 }
12665 }
12666
Owen Taylor3473f882001-02-23 17:55:21 +000012667 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12668
12669 if (ctxt->myDoc != NULL) {
12670 if (ctxt->wellFormed) {
12671 ret = ctxt->myDoc->extSubset;
12672 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012673 if (ret != NULL) {
12674 xmlNodePtr tmp;
12675
12676 ret->doc = NULL;
12677 tmp = ret->children;
12678 while (tmp != NULL) {
12679 tmp->doc = NULL;
12680 tmp = tmp->next;
12681 }
12682 }
Owen Taylor3473f882001-02-23 17:55:21 +000012683 } else {
12684 ret = NULL;
12685 }
12686 xmlFreeDoc(ctxt->myDoc);
12687 ctxt->myDoc = NULL;
12688 }
12689 if (sax != NULL) ctxt->sax = NULL;
12690 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012691
Owen Taylor3473f882001-02-23 17:55:21 +000012692 return(ret);
12693}
12694
12695/**
12696 * xmlSAXParseDTD:
12697 * @sax: the SAX handler block
12698 * @ExternalID: a NAME* containing the External ID of the DTD
12699 * @SystemID: a NAME* containing the URL to the DTD
12700 *
12701 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012702 *
Owen Taylor3473f882001-02-23 17:55:21 +000012703 * Returns the resulting xmlDtdPtr or NULL in case of error.
12704 */
12705
12706xmlDtdPtr
12707xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12708 const xmlChar *SystemID) {
12709 xmlDtdPtr ret = NULL;
12710 xmlParserCtxtPtr ctxt;
12711 xmlParserInputPtr input = NULL;
12712 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012713 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012714
12715 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12716
12717 ctxt = xmlNewParserCtxt();
12718 if (ctxt == NULL) {
12719 return(NULL);
12720 }
12721
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012722 /* We are loading a DTD */
12723 ctxt->options |= XML_PARSE_DTDLOAD;
12724
Owen Taylor3473f882001-02-23 17:55:21 +000012725 /*
12726 * Set-up the SAX context
12727 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012728 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012729 if (ctxt->sax != NULL)
12730 xmlFree(ctxt->sax);
12731 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012732 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012733 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012734
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012735 /*
12736 * Canonicalise the system ID
12737 */
12738 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012739 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012740 xmlFreeParserCtxt(ctxt);
12741 return(NULL);
12742 }
Owen Taylor3473f882001-02-23 17:55:21 +000012743
12744 /*
12745 * Ask the Entity resolver to load the damn thing
12746 */
12747
12748 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012749 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12750 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012751 if (input == NULL) {
12752 if (sax != NULL) ctxt->sax = NULL;
12753 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012754 if (systemIdCanonic != NULL)
12755 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012756 return(NULL);
12757 }
12758
12759 /*
12760 * plug some encoding conversion routines here.
12761 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012762 if (xmlPushInput(ctxt, input) < 0) {
12763 if (sax != NULL) ctxt->sax = NULL;
12764 xmlFreeParserCtxt(ctxt);
12765 if (systemIdCanonic != NULL)
12766 xmlFree(systemIdCanonic);
12767 return(NULL);
12768 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012769 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12770 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12771 xmlSwitchEncoding(ctxt, enc);
12772 }
Owen Taylor3473f882001-02-23 17:55:21 +000012773
12774 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012775 input->filename = (char *) systemIdCanonic;
12776 else
12777 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012778 input->line = 1;
12779 input->col = 1;
12780 input->base = ctxt->input->cur;
12781 input->cur = ctxt->input->cur;
12782 input->free = NULL;
12783
12784 /*
12785 * let's parse that entity knowing it's an external subset.
12786 */
12787 ctxt->inSubset = 2;
12788 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012789 if (ctxt->myDoc == NULL) {
12790 xmlErrMemory(ctxt, "New Doc failed");
12791 if (sax != NULL) ctxt->sax = NULL;
12792 xmlFreeParserCtxt(ctxt);
12793 return(NULL);
12794 }
12795 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012796 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12797 ExternalID, SystemID);
12798 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12799
12800 if (ctxt->myDoc != NULL) {
12801 if (ctxt->wellFormed) {
12802 ret = ctxt->myDoc->extSubset;
12803 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012804 if (ret != NULL) {
12805 xmlNodePtr tmp;
12806
12807 ret->doc = NULL;
12808 tmp = ret->children;
12809 while (tmp != NULL) {
12810 tmp->doc = NULL;
12811 tmp = tmp->next;
12812 }
12813 }
Owen Taylor3473f882001-02-23 17:55:21 +000012814 } else {
12815 ret = NULL;
12816 }
12817 xmlFreeDoc(ctxt->myDoc);
12818 ctxt->myDoc = NULL;
12819 }
12820 if (sax != NULL) ctxt->sax = NULL;
12821 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012822
Owen Taylor3473f882001-02-23 17:55:21 +000012823 return(ret);
12824}
12825
Daniel Veillard4432df22003-09-28 18:58:27 +000012826
Owen Taylor3473f882001-02-23 17:55:21 +000012827/**
12828 * xmlParseDTD:
12829 * @ExternalID: a NAME* containing the External ID of the DTD
12830 * @SystemID: a NAME* containing the URL to the DTD
12831 *
12832 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012833 *
Owen Taylor3473f882001-02-23 17:55:21 +000012834 * Returns the resulting xmlDtdPtr or NULL in case of error.
12835 */
12836
12837xmlDtdPtr
12838xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12839 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12840}
Daniel Veillard4432df22003-09-28 18:58:27 +000012841#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012842
12843/************************************************************************
12844 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012845 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012846 * *
12847 ************************************************************************/
12848
12849/**
Owen Taylor3473f882001-02-23 17:55:21 +000012850 * xmlParseCtxtExternalEntity:
12851 * @ctx: the existing parsing context
12852 * @URL: the URL for the entity to load
12853 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012854 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012855 *
12856 * Parse an external general entity within an existing parsing context
12857 * An external general parsed entity is well-formed if it matches the
12858 * production labeled extParsedEnt.
12859 *
12860 * [78] extParsedEnt ::= TextDecl? content
12861 *
12862 * Returns 0 if the entity is well formed, -1 in case of args problem and
12863 * the parser error code otherwise
12864 */
12865
12866int
12867xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012868 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012869 xmlParserCtxtPtr ctxt;
12870 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012871 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012872 xmlSAXHandlerPtr oldsax = NULL;
12873 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012874 xmlChar start[4];
12875 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012876
Daniel Veillardce682bc2004-11-05 17:22:25 +000012877 if (ctx == NULL) return(-1);
12878
Daniel Veillard0161e632008-08-28 15:36:32 +000012879 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12880 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012881 return(XML_ERR_ENTITY_LOOP);
12882 }
12883
Daniel Veillardcda96922001-08-21 10:56:31 +000012884 if (lst != NULL)
12885 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012886 if ((URL == NULL) && (ID == NULL))
12887 return(-1);
12888 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12889 return(-1);
12890
Rob Richards798743a2009-06-19 13:54:25 -040012891 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012892 if (ctxt == NULL) {
12893 return(-1);
12894 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012895
Owen Taylor3473f882001-02-23 17:55:21 +000012896 oldsax = ctxt->sax;
12897 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012898 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012899 newDoc = xmlNewDoc(BAD_CAST "1.0");
12900 if (newDoc == NULL) {
12901 xmlFreeParserCtxt(ctxt);
12902 return(-1);
12903 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012904 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012905 if (ctx->myDoc->dict) {
12906 newDoc->dict = ctx->myDoc->dict;
12907 xmlDictReference(newDoc->dict);
12908 }
Owen Taylor3473f882001-02-23 17:55:21 +000012909 if (ctx->myDoc != NULL) {
12910 newDoc->intSubset = ctx->myDoc->intSubset;
12911 newDoc->extSubset = ctx->myDoc->extSubset;
12912 }
12913 if (ctx->myDoc->URL != NULL) {
12914 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12915 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012916 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12917 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012918 ctxt->sax = oldsax;
12919 xmlFreeParserCtxt(ctxt);
12920 newDoc->intSubset = NULL;
12921 newDoc->extSubset = NULL;
12922 xmlFreeDoc(newDoc);
12923 return(-1);
12924 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012925 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012926 nodePush(ctxt, newDoc->children);
12927 if (ctx->myDoc == NULL) {
12928 ctxt->myDoc = newDoc;
12929 } else {
12930 ctxt->myDoc = ctx->myDoc;
12931 newDoc->children->doc = ctx->myDoc;
12932 }
12933
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012934 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012935 * Get the 4 first bytes and decode the charset
12936 * if enc != XML_CHAR_ENCODING_NONE
12937 * plug some encoding conversion routines.
12938 */
12939 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012940 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12941 start[0] = RAW;
12942 start[1] = NXT(1);
12943 start[2] = NXT(2);
12944 start[3] = NXT(3);
12945 enc = xmlDetectCharEncoding(start, 4);
12946 if (enc != XML_CHAR_ENCODING_NONE) {
12947 xmlSwitchEncoding(ctxt, enc);
12948 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012949 }
12950
Owen Taylor3473f882001-02-23 17:55:21 +000012951 /*
12952 * Parse a possible text declaration first
12953 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012954 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012955 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012956 /*
12957 * An XML-1.0 document can't reference an entity not XML-1.0
12958 */
12959 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12960 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012961 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012962 "Version mismatch between document and entity\n");
12963 }
Owen Taylor3473f882001-02-23 17:55:21 +000012964 }
12965
12966 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012967 * If the user provided its own SAX callbacks then reuse the
12968 * useData callback field, otherwise the expected setup in a
12969 * DOM builder is to have userData == ctxt
12970 */
12971 if (ctx->userData == ctx)
12972 ctxt->userData = ctxt;
12973 else
12974 ctxt->userData = ctx->userData;
12975
12976 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012977 * Doing validity checking on chunk doesn't make sense
12978 */
12979 ctxt->instate = XML_PARSER_CONTENT;
12980 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012981 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012982 ctxt->loadsubset = ctx->loadsubset;
12983 ctxt->depth = ctx->depth + 1;
12984 ctxt->replaceEntities = ctx->replaceEntities;
12985 if (ctxt->validate) {
12986 ctxt->vctxt.error = ctx->vctxt.error;
12987 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012988 } else {
12989 ctxt->vctxt.error = NULL;
12990 ctxt->vctxt.warning = NULL;
12991 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012992 ctxt->vctxt.nodeTab = NULL;
12993 ctxt->vctxt.nodeNr = 0;
12994 ctxt->vctxt.nodeMax = 0;
12995 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012996 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12997 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012998 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12999 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13000 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013001 ctxt->dictNames = ctx->dictNames;
13002 ctxt->attsDefault = ctx->attsDefault;
13003 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013004 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013005
13006 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013007
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013008 ctx->validate = ctxt->validate;
13009 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013010 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013012 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013013 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013014 }
13015 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013016 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013017 }
13018
13019 if (!ctxt->wellFormed) {
13020 if (ctxt->errNo == 0)
13021 ret = 1;
13022 else
13023 ret = ctxt->errNo;
13024 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013025 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013026 xmlNodePtr cur;
13027
13028 /*
13029 * Return the newly created nodeset after unlinking it from
13030 * they pseudo parent.
13031 */
13032 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013033 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013034 while (cur != NULL) {
13035 cur->parent = NULL;
13036 cur = cur->next;
13037 }
13038 newDoc->children->children = NULL;
13039 }
13040 ret = 0;
13041 }
13042 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013043 ctxt->dict = NULL;
13044 ctxt->attsDefault = NULL;
13045 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013046 xmlFreeParserCtxt(ctxt);
13047 newDoc->intSubset = NULL;
13048 newDoc->extSubset = NULL;
13049 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013050
Owen Taylor3473f882001-02-23 17:55:21 +000013051 return(ret);
13052}
13053
13054/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013055 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013056 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013057 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013058 * @sax: the SAX handler bloc (possibly NULL)
13059 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13060 * @depth: Used for loop detection, use 0
13061 * @URL: the URL for the entity to load
13062 * @ID: the System ID for the entity to load
13063 * @list: the return value for the set of parsed nodes
13064 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013065 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013066 *
13067 * Returns 0 if the entity is well formed, -1 in case of args problem and
13068 * the parser error code otherwise
13069 */
13070
Daniel Veillard7d515752003-09-26 19:12:37 +000013071static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013072xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13073 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013074 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013075 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013076 xmlParserCtxtPtr ctxt;
13077 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013078 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013079 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013080 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013081 xmlChar start[4];
13082 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013083
Daniel Veillard0161e632008-08-28 15:36:32 +000013084 if (((depth > 40) &&
13085 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13086 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013087 return(XML_ERR_ENTITY_LOOP);
13088 }
13089
Owen Taylor3473f882001-02-23 17:55:21 +000013090 if (list != NULL)
13091 *list = NULL;
13092 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013093 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013094 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013095 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013096
13097
Rob Richards9c0aa472009-03-26 18:10:19 +000013098 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013099 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013100 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013101 if (oldctxt != NULL) {
13102 ctxt->_private = oldctxt->_private;
13103 ctxt->loadsubset = oldctxt->loadsubset;
13104 ctxt->validate = oldctxt->validate;
13105 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013106 ctxt->record_info = oldctxt->record_info;
13107 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13108 ctxt->node_seq.length = oldctxt->node_seq.length;
13109 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013110 } else {
13111 /*
13112 * Doing validity checking on chunk without context
13113 * doesn't make sense
13114 */
13115 ctxt->_private = NULL;
13116 ctxt->validate = 0;
13117 ctxt->external = 2;
13118 ctxt->loadsubset = 0;
13119 }
Owen Taylor3473f882001-02-23 17:55:21 +000013120 if (sax != NULL) {
13121 oldsax = ctxt->sax;
13122 ctxt->sax = sax;
13123 if (user_data != NULL)
13124 ctxt->userData = user_data;
13125 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013126 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013127 newDoc = xmlNewDoc(BAD_CAST "1.0");
13128 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013129 ctxt->node_seq.maximum = 0;
13130 ctxt->node_seq.length = 0;
13131 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013132 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013133 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013134 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013135 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013136 newDoc->intSubset = doc->intSubset;
13137 newDoc->extSubset = doc->extSubset;
13138 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013139 xmlDictReference(newDoc->dict);
13140
Owen Taylor3473f882001-02-23 17:55:21 +000013141 if (doc->URL != NULL) {
13142 newDoc->URL = xmlStrdup(doc->URL);
13143 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013144 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13145 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013146 if (sax != NULL)
13147 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013148 ctxt->node_seq.maximum = 0;
13149 ctxt->node_seq.length = 0;
13150 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013151 xmlFreeParserCtxt(ctxt);
13152 newDoc->intSubset = NULL;
13153 newDoc->extSubset = NULL;
13154 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013155 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013156 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013157 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013158 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013159 ctxt->myDoc = doc;
13160 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013161
Daniel Veillard0161e632008-08-28 15:36:32 +000013162 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013163 * Get the 4 first bytes and decode the charset
13164 * if enc != XML_CHAR_ENCODING_NONE
13165 * plug some encoding conversion routines.
13166 */
13167 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013168 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13169 start[0] = RAW;
13170 start[1] = NXT(1);
13171 start[2] = NXT(2);
13172 start[3] = NXT(3);
13173 enc = xmlDetectCharEncoding(start, 4);
13174 if (enc != XML_CHAR_ENCODING_NONE) {
13175 xmlSwitchEncoding(ctxt, enc);
13176 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013177 }
13178
Owen Taylor3473f882001-02-23 17:55:21 +000013179 /*
13180 * Parse a possible text declaration first
13181 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013182 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013183 xmlParseTextDecl(ctxt);
13184 }
13185
Owen Taylor3473f882001-02-23 17:55:21 +000013186 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013187 ctxt->depth = depth;
13188
13189 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013190
Daniel Veillard561b7f82002-03-20 21:55:57 +000013191 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013193 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013194 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013195 }
13196 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013197 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013198 }
13199
13200 if (!ctxt->wellFormed) {
13201 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013202 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013203 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013204 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013205 } else {
13206 if (list != NULL) {
13207 xmlNodePtr cur;
13208
13209 /*
13210 * Return the newly created nodeset after unlinking it from
13211 * they pseudo parent.
13212 */
13213 cur = newDoc->children->children;
13214 *list = cur;
13215 while (cur != NULL) {
13216 cur->parent = NULL;
13217 cur = cur->next;
13218 }
13219 newDoc->children->children = NULL;
13220 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013221 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013222 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013223
13224 /*
13225 * Record in the parent context the number of entities replacement
13226 * done when parsing that reference.
13227 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013228 if (oldctxt != NULL)
13229 oldctxt->nbentities += ctxt->nbentities;
13230
Daniel Veillard0161e632008-08-28 15:36:32 +000013231 /*
13232 * Also record the size of the entity parsed
13233 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013234 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013235 oldctxt->sizeentities += ctxt->input->consumed;
13236 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13237 }
13238 /*
13239 * And record the last error if any
13240 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013241 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013242 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13243
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013244 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013245 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013246 if (oldctxt != NULL) {
13247 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13248 oldctxt->node_seq.length = ctxt->node_seq.length;
13249 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13250 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013251 ctxt->node_seq.maximum = 0;
13252 ctxt->node_seq.length = 0;
13253 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013254 xmlFreeParserCtxt(ctxt);
13255 newDoc->intSubset = NULL;
13256 newDoc->extSubset = NULL;
13257 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013258
Owen Taylor3473f882001-02-23 17:55:21 +000013259 return(ret);
13260}
13261
Daniel Veillard81273902003-09-30 00:43:48 +000013262#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013263/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013264 * xmlParseExternalEntity:
13265 * @doc: the document the chunk pertains to
13266 * @sax: the SAX handler bloc (possibly NULL)
13267 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13268 * @depth: Used for loop detection, use 0
13269 * @URL: the URL for the entity to load
13270 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013271 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013272 *
13273 * Parse an external general entity
13274 * An external general parsed entity is well-formed if it matches the
13275 * production labeled extParsedEnt.
13276 *
13277 * [78] extParsedEnt ::= TextDecl? content
13278 *
13279 * Returns 0 if the entity is well formed, -1 in case of args problem and
13280 * the parser error code otherwise
13281 */
13282
13283int
13284xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013285 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013286 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013287 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013288}
13289
13290/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013291 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013292 * @doc: the document the chunk pertains to
13293 * @sax: the SAX handler bloc (possibly NULL)
13294 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13295 * @depth: Used for loop detection, use 0
13296 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013297 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013298 *
13299 * Parse a well-balanced chunk of an XML document
13300 * called by the parser
13301 * The allowed sequence for the Well Balanced Chunk is the one defined by
13302 * the content production in the XML grammar:
13303 *
13304 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13305 *
13306 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13307 * the parser error code otherwise
13308 */
13309
13310int
13311xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013312 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013313 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13314 depth, string, lst, 0 );
13315}
Daniel Veillard81273902003-09-30 00:43:48 +000013316#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013317
13318/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013319 * xmlParseBalancedChunkMemoryInternal:
13320 * @oldctxt: the existing parsing context
13321 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13322 * @user_data: the user data field for the parser context
13323 * @lst: the return value for the set of parsed nodes
13324 *
13325 *
13326 * Parse a well-balanced chunk of an XML document
13327 * called by the parser
13328 * The allowed sequence for the Well Balanced Chunk is the one defined by
13329 * the content production in the XML grammar:
13330 *
13331 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13332 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013333 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13334 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013335 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013336 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013337 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013338 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013339static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013340xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13341 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13342 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013343 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013344 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013345 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013346 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013347 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013348 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013349 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013350#ifdef SAX2
13351 int i;
13352#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013353
Daniel Veillard0161e632008-08-28 15:36:32 +000013354 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13355 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013356 return(XML_ERR_ENTITY_LOOP);
13357 }
13358
13359
13360 if (lst != NULL)
13361 *lst = NULL;
13362 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013363 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013364
13365 size = xmlStrlen(string);
13366
13367 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013368 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013369 if (user_data != NULL)
13370 ctxt->userData = user_data;
13371 else
13372 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013373 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13374 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013375 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13376 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13377 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013378
Daniel Veillard74eaec12009-08-26 15:57:20 +020013379#ifdef SAX2
13380 /* propagate namespaces down the entity */
13381 for (i = 0;i < oldctxt->nsNr;i += 2) {
13382 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13383 }
13384#endif
13385
Daniel Veillard328f48c2002-11-15 15:24:34 +000013386 oldsax = ctxt->sax;
13387 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013388 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013389 ctxt->replaceEntities = oldctxt->replaceEntities;
13390 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013391
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013392 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013393 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013394 newDoc = xmlNewDoc(BAD_CAST "1.0");
13395 if (newDoc == NULL) {
13396 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013397 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013398 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013399 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013400 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013401 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013402 newDoc->dict = ctxt->dict;
13403 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013404 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013405 } else {
13406 ctxt->myDoc = oldctxt->myDoc;
13407 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013408 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013409 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013410 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13411 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013412 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013413 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013414 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013415 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013416 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013417 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013418 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013419 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013420 ctxt->myDoc->children = NULL;
13421 ctxt->myDoc->last = NULL;
13422 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013423 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013424 ctxt->instate = XML_PARSER_CONTENT;
13425 ctxt->depth = oldctxt->depth + 1;
13426
Daniel Veillard328f48c2002-11-15 15:24:34 +000013427 ctxt->validate = 0;
13428 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013429 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13430 /*
13431 * ID/IDREF registration will be done in xmlValidateElement below
13432 */
13433 ctxt->loadsubset |= XML_SKIP_IDS;
13434 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013435 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013436 ctxt->attsDefault = oldctxt->attsDefault;
13437 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013438
Daniel Veillard68e9e742002-11-16 15:35:11 +000013439 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013440 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013442 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013443 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013444 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013445 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013446 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013447 }
13448
13449 if (!ctxt->wellFormed) {
13450 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013451 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013452 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013453 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013454 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013455 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013456 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013457
William M. Brack7b9154b2003-09-27 19:23:50 +000013458 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013459 xmlNodePtr cur;
13460
13461 /*
13462 * Return the newly created nodeset after unlinking it from
13463 * they pseudo parent.
13464 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013465 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013466 *lst = cur;
13467 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013468#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013469 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13470 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13471 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013472 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13473 oldctxt->myDoc, cur);
13474 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013475#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013476 cur->parent = NULL;
13477 cur = cur->next;
13478 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013479 ctxt->myDoc->children->children = NULL;
13480 }
13481 if (ctxt->myDoc != NULL) {
13482 xmlFreeNode(ctxt->myDoc->children);
13483 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013484 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013485 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013486
13487 /*
13488 * Record in the parent context the number of entities replacement
13489 * done when parsing that reference.
13490 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013491 if (oldctxt != NULL)
13492 oldctxt->nbentities += ctxt->nbentities;
13493
Daniel Veillard0161e632008-08-28 15:36:32 +000013494 /*
13495 * Also record the last error if any
13496 */
13497 if (ctxt->lastError.code != XML_ERR_OK)
13498 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13499
Daniel Veillard328f48c2002-11-15 15:24:34 +000013500 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013501 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013502 ctxt->attsDefault = NULL;
13503 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013504 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013505 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013506 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013507 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013508
Daniel Veillard328f48c2002-11-15 15:24:34 +000013509 return(ret);
13510}
13511
Daniel Veillard29b17482004-08-16 00:39:03 +000013512/**
13513 * xmlParseInNodeContext:
13514 * @node: the context node
13515 * @data: the input string
13516 * @datalen: the input string length in bytes
13517 * @options: a combination of xmlParserOption
13518 * @lst: the return value for the set of parsed nodes
13519 *
13520 * Parse a well-balanced chunk of an XML document
13521 * within the context (DTD, namespaces, etc ...) of the given node.
13522 *
13523 * The allowed sequence for the data is a Well Balanced Chunk defined by
13524 * the content production in the XML grammar:
13525 *
13526 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13527 *
13528 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13529 * error code otherwise
13530 */
13531xmlParserErrors
13532xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13533 int options, xmlNodePtr *lst) {
13534#ifdef SAX2
13535 xmlParserCtxtPtr ctxt;
13536 xmlDocPtr doc = NULL;
13537 xmlNodePtr fake, cur;
13538 int nsnr = 0;
13539
13540 xmlParserErrors ret = XML_ERR_OK;
13541
13542 /*
13543 * check all input parameters, grab the document
13544 */
13545 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13546 return(XML_ERR_INTERNAL_ERROR);
13547 switch (node->type) {
13548 case XML_ELEMENT_NODE:
13549 case XML_ATTRIBUTE_NODE:
13550 case XML_TEXT_NODE:
13551 case XML_CDATA_SECTION_NODE:
13552 case XML_ENTITY_REF_NODE:
13553 case XML_PI_NODE:
13554 case XML_COMMENT_NODE:
13555 case XML_DOCUMENT_NODE:
13556 case XML_HTML_DOCUMENT_NODE:
13557 break;
13558 default:
13559 return(XML_ERR_INTERNAL_ERROR);
13560
13561 }
13562 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13563 (node->type != XML_DOCUMENT_NODE) &&
13564 (node->type != XML_HTML_DOCUMENT_NODE))
13565 node = node->parent;
13566 if (node == NULL)
13567 return(XML_ERR_INTERNAL_ERROR);
13568 if (node->type == XML_ELEMENT_NODE)
13569 doc = node->doc;
13570 else
13571 doc = (xmlDocPtr) node;
13572 if (doc == NULL)
13573 return(XML_ERR_INTERNAL_ERROR);
13574
13575 /*
13576 * allocate a context and set-up everything not related to the
13577 * node position in the tree
13578 */
13579 if (doc->type == XML_DOCUMENT_NODE)
13580 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13581#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013582 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013583 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013584 /*
13585 * When parsing in context, it makes no sense to add implied
13586 * elements like html/body/etc...
13587 */
13588 options |= HTML_PARSE_NOIMPLIED;
13589 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013590#endif
13591 else
13592 return(XML_ERR_INTERNAL_ERROR);
13593
13594 if (ctxt == NULL)
13595 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013596
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013597 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013598 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13599 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13600 * we must wait until the last moment to free the original one.
13601 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013602 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013603 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013604 xmlDictFree(ctxt->dict);
13605 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013606 } else
13607 options |= XML_PARSE_NODICT;
13608
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013609 if (doc->encoding != NULL) {
13610 xmlCharEncodingHandlerPtr hdlr;
13611
13612 if (ctxt->encoding != NULL)
13613 xmlFree((xmlChar *) ctxt->encoding);
13614 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13615
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013616 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013617 if (hdlr != NULL) {
13618 xmlSwitchToEncoding(ctxt, hdlr);
13619 } else {
13620 return(XML_ERR_UNSUPPORTED_ENCODING);
13621 }
13622 }
13623
Daniel Veillard37334572008-07-31 08:20:02 +000013624 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013625 xmlDetectSAX2(ctxt);
13626 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013627 /* parsing in context, i.e. as within existing content */
13628 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013629
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013630 fake = xmlNewComment(NULL);
13631 if (fake == NULL) {
13632 xmlFreeParserCtxt(ctxt);
13633 return(XML_ERR_NO_MEMORY);
13634 }
13635 xmlAddChild(node, fake);
13636
Daniel Veillard29b17482004-08-16 00:39:03 +000013637 if (node->type == XML_ELEMENT_NODE) {
13638 nodePush(ctxt, node);
13639 /*
13640 * initialize the SAX2 namespaces stack
13641 */
13642 cur = node;
13643 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13644 xmlNsPtr ns = cur->nsDef;
13645 const xmlChar *iprefix, *ihref;
13646
13647 while (ns != NULL) {
13648 if (ctxt->dict) {
13649 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13650 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13651 } else {
13652 iprefix = ns->prefix;
13653 ihref = ns->href;
13654 }
13655
13656 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13657 nsPush(ctxt, iprefix, ihref);
13658 nsnr++;
13659 }
13660 ns = ns->next;
13661 }
13662 cur = cur->parent;
13663 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013664 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013665
13666 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13667 /*
13668 * ID/IDREF registration will be done in xmlValidateElement below
13669 */
13670 ctxt->loadsubset |= XML_SKIP_IDS;
13671 }
13672
Daniel Veillard499cc922006-01-18 17:22:35 +000013673#ifdef LIBXML_HTML_ENABLED
13674 if (doc->type == XML_HTML_DOCUMENT_NODE)
13675 __htmlParseContent(ctxt);
13676 else
13677#endif
13678 xmlParseContent(ctxt);
13679
Daniel Veillard29b17482004-08-16 00:39:03 +000013680 nsPop(ctxt, nsnr);
13681 if ((RAW == '<') && (NXT(1) == '/')) {
13682 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13683 } else if (RAW != 0) {
13684 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13685 }
13686 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13687 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13688 ctxt->wellFormed = 0;
13689 }
13690
13691 if (!ctxt->wellFormed) {
13692 if (ctxt->errNo == 0)
13693 ret = XML_ERR_INTERNAL_ERROR;
13694 else
13695 ret = (xmlParserErrors)ctxt->errNo;
13696 } else {
13697 ret = XML_ERR_OK;
13698 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013699
Daniel Veillard29b17482004-08-16 00:39:03 +000013700 /*
13701 * Return the newly created nodeset after unlinking it from
13702 * the pseudo sibling.
13703 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013704
Daniel Veillard29b17482004-08-16 00:39:03 +000013705 cur = fake->next;
13706 fake->next = NULL;
13707 node->last = fake;
13708
13709 if (cur != NULL) {
13710 cur->prev = NULL;
13711 }
13712
13713 *lst = cur;
13714
13715 while (cur != NULL) {
13716 cur->parent = NULL;
13717 cur = cur->next;
13718 }
13719
13720 xmlUnlinkNode(fake);
13721 xmlFreeNode(fake);
13722
13723
13724 if (ret != XML_ERR_OK) {
13725 xmlFreeNodeList(*lst);
13726 *lst = NULL;
13727 }
William M. Brackc3f81342004-10-03 01:22:44 +000013728
William M. Brackb7b54de2004-10-06 16:38:01 +000013729 if (doc->dict != NULL)
13730 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013731 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013732
Daniel Veillard29b17482004-08-16 00:39:03 +000013733 return(ret);
13734#else /* !SAX2 */
13735 return(XML_ERR_INTERNAL_ERROR);
13736#endif
13737}
13738
Daniel Veillard81273902003-09-30 00:43:48 +000013739#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013740/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013741 * xmlParseBalancedChunkMemoryRecover:
13742 * @doc: the document the chunk pertains to
13743 * @sax: the SAX handler bloc (possibly NULL)
13744 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13745 * @depth: Used for loop detection, use 0
13746 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13747 * @lst: the return value for the set of parsed nodes
13748 * @recover: return nodes even if the data is broken (use 0)
13749 *
13750 *
13751 * Parse a well-balanced chunk of an XML document
13752 * called by the parser
13753 * The allowed sequence for the Well Balanced Chunk is the one defined by
13754 * the content production in the XML grammar:
13755 *
13756 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13757 *
13758 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13759 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013760 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013761 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013762 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13763 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013764 */
13765int
13766xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013767 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013768 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013769 xmlParserCtxtPtr ctxt;
13770 xmlDocPtr newDoc;
13771 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013772 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013773 int size;
13774 int ret = 0;
13775
Daniel Veillard0161e632008-08-28 15:36:32 +000013776 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013777 return(XML_ERR_ENTITY_LOOP);
13778 }
13779
13780
Daniel Veillardcda96922001-08-21 10:56:31 +000013781 if (lst != NULL)
13782 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013783 if (string == NULL)
13784 return(-1);
13785
13786 size = xmlStrlen(string);
13787
13788 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13789 if (ctxt == NULL) return(-1);
13790 ctxt->userData = ctxt;
13791 if (sax != NULL) {
13792 oldsax = ctxt->sax;
13793 ctxt->sax = sax;
13794 if (user_data != NULL)
13795 ctxt->userData = user_data;
13796 }
13797 newDoc = xmlNewDoc(BAD_CAST "1.0");
13798 if (newDoc == NULL) {
13799 xmlFreeParserCtxt(ctxt);
13800 return(-1);
13801 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013802 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013803 if ((doc != NULL) && (doc->dict != NULL)) {
13804 xmlDictFree(ctxt->dict);
13805 ctxt->dict = doc->dict;
13806 xmlDictReference(ctxt->dict);
13807 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13808 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13809 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13810 ctxt->dictNames = 1;
13811 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013812 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013813 }
Owen Taylor3473f882001-02-23 17:55:21 +000013814 if (doc != NULL) {
13815 newDoc->intSubset = doc->intSubset;
13816 newDoc->extSubset = doc->extSubset;
13817 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013818 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13819 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013820 if (sax != NULL)
13821 ctxt->sax = oldsax;
13822 xmlFreeParserCtxt(ctxt);
13823 newDoc->intSubset = NULL;
13824 newDoc->extSubset = NULL;
13825 xmlFreeDoc(newDoc);
13826 return(-1);
13827 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013828 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13829 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013830 if (doc == NULL) {
13831 ctxt->myDoc = newDoc;
13832 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013833 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013834 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013835 /* Ensure that doc has XML spec namespace */
13836 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13837 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013838 }
13839 ctxt->instate = XML_PARSER_CONTENT;
13840 ctxt->depth = depth;
13841
13842 /*
13843 * Doing validity checking on chunk doesn't make sense
13844 */
13845 ctxt->validate = 0;
13846 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013847 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013848
Daniel Veillardb39bc392002-10-26 19:29:51 +000013849 if ( doc != NULL ){
13850 content = doc->children;
13851 doc->children = NULL;
13852 xmlParseContent(ctxt);
13853 doc->children = content;
13854 }
13855 else {
13856 xmlParseContent(ctxt);
13857 }
Owen Taylor3473f882001-02-23 17:55:21 +000013858 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013859 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013860 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013861 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013862 }
13863 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013864 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013865 }
13866
13867 if (!ctxt->wellFormed) {
13868 if (ctxt->errNo == 0)
13869 ret = 1;
13870 else
13871 ret = ctxt->errNo;
13872 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013873 ret = 0;
13874 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013875
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013876 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13877 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013878
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013879 /*
13880 * Return the newly created nodeset after unlinking it from
13881 * they pseudo parent.
13882 */
13883 cur = newDoc->children->children;
13884 *lst = cur;
13885 while (cur != NULL) {
13886 xmlSetTreeDoc(cur, doc);
13887 cur->parent = NULL;
13888 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013889 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013890 newDoc->children->children = NULL;
13891 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013892
13893 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013894 ctxt->sax = oldsax;
13895 xmlFreeParserCtxt(ctxt);
13896 newDoc->intSubset = NULL;
13897 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013898 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013899 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013900
Owen Taylor3473f882001-02-23 17:55:21 +000013901 return(ret);
13902}
13903
13904/**
13905 * xmlSAXParseEntity:
13906 * @sax: the SAX handler block
13907 * @filename: the filename
13908 *
13909 * parse an XML external entity out of context and build a tree.
13910 * It use the given SAX function block to handle the parsing callback.
13911 * If sax is NULL, fallback to the default DOM tree building routines.
13912 *
13913 * [78] extParsedEnt ::= TextDecl? content
13914 *
13915 * This correspond to a "Well Balanced" chunk
13916 *
13917 * Returns the resulting document tree
13918 */
13919
13920xmlDocPtr
13921xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13922 xmlDocPtr ret;
13923 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013924
13925 ctxt = xmlCreateFileParserCtxt(filename);
13926 if (ctxt == NULL) {
13927 return(NULL);
13928 }
13929 if (sax != NULL) {
13930 if (ctxt->sax != NULL)
13931 xmlFree(ctxt->sax);
13932 ctxt->sax = sax;
13933 ctxt->userData = NULL;
13934 }
13935
Owen Taylor3473f882001-02-23 17:55:21 +000013936 xmlParseExtParsedEnt(ctxt);
13937
13938 if (ctxt->wellFormed)
13939 ret = ctxt->myDoc;
13940 else {
13941 ret = NULL;
13942 xmlFreeDoc(ctxt->myDoc);
13943 ctxt->myDoc = NULL;
13944 }
13945 if (sax != NULL)
13946 ctxt->sax = NULL;
13947 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013948
Owen Taylor3473f882001-02-23 17:55:21 +000013949 return(ret);
13950}
13951
13952/**
13953 * xmlParseEntity:
13954 * @filename: the filename
13955 *
13956 * parse an XML external entity out of context and build a tree.
13957 *
13958 * [78] extParsedEnt ::= TextDecl? content
13959 *
13960 * This correspond to a "Well Balanced" chunk
13961 *
13962 * Returns the resulting document tree
13963 */
13964
13965xmlDocPtr
13966xmlParseEntity(const char *filename) {
13967 return(xmlSAXParseEntity(NULL, filename));
13968}
Daniel Veillard81273902003-09-30 00:43:48 +000013969#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013970
13971/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013972 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013973 * @URL: the entity URL
13974 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013975 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013976 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013977 *
13978 * Create a parser context for an external entity
13979 * Automatic support for ZLIB/Compress compressed document is provided
13980 * by default if found at compile-time.
13981 *
13982 * Returns the new parser context or NULL
13983 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013984static xmlParserCtxtPtr
13985xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13986 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013987 xmlParserCtxtPtr ctxt;
13988 xmlParserInputPtr inputStream;
13989 char *directory = NULL;
13990 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013991
Owen Taylor3473f882001-02-23 17:55:21 +000013992 ctxt = xmlNewParserCtxt();
13993 if (ctxt == NULL) {
13994 return(NULL);
13995 }
13996
Daniel Veillard48247b42009-07-10 16:12:46 +020013997 if (pctx != NULL) {
13998 ctxt->options = pctx->options;
13999 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014000 }
14001
Owen Taylor3473f882001-02-23 17:55:21 +000014002 uri = xmlBuildURI(URL, base);
14003
14004 if (uri == NULL) {
14005 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14006 if (inputStream == NULL) {
14007 xmlFreeParserCtxt(ctxt);
14008 return(NULL);
14009 }
14010
14011 inputPush(ctxt, inputStream);
14012
14013 if ((ctxt->directory == NULL) && (directory == NULL))
14014 directory = xmlParserGetDirectory((char *)URL);
14015 if ((ctxt->directory == NULL) && (directory != NULL))
14016 ctxt->directory = directory;
14017 } else {
14018 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14019 if (inputStream == NULL) {
14020 xmlFree(uri);
14021 xmlFreeParserCtxt(ctxt);
14022 return(NULL);
14023 }
14024
14025 inputPush(ctxt, inputStream);
14026
14027 if ((ctxt->directory == NULL) && (directory == NULL))
14028 directory = xmlParserGetDirectory((char *)uri);
14029 if ((ctxt->directory == NULL) && (directory != NULL))
14030 ctxt->directory = directory;
14031 xmlFree(uri);
14032 }
Owen Taylor3473f882001-02-23 17:55:21 +000014033 return(ctxt);
14034}
14035
Rob Richards9c0aa472009-03-26 18:10:19 +000014036/**
14037 * xmlCreateEntityParserCtxt:
14038 * @URL: the entity URL
14039 * @ID: the entity PUBLIC ID
14040 * @base: a possible base for the target URI
14041 *
14042 * Create a parser context for an external entity
14043 * Automatic support for ZLIB/Compress compressed document is provided
14044 * by default if found at compile-time.
14045 *
14046 * Returns the new parser context or NULL
14047 */
14048xmlParserCtxtPtr
14049xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14050 const xmlChar *base) {
14051 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14052
14053}
14054
Owen Taylor3473f882001-02-23 17:55:21 +000014055/************************************************************************
14056 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014057 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014058 * *
14059 ************************************************************************/
14060
14061/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014062 * xmlCreateURLParserCtxt:
14063 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014064 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014065 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014066 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014067 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014068 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014069 *
14070 * Returns the new parser context or NULL
14071 */
14072xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014073xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014074{
14075 xmlParserCtxtPtr ctxt;
14076 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014077 char *directory = NULL;
14078
Owen Taylor3473f882001-02-23 17:55:21 +000014079 ctxt = xmlNewParserCtxt();
14080 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014081 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014082 return(NULL);
14083 }
14084
Daniel Veillarddf292f72005-01-16 19:00:15 +000014085 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014086 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014087 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014088
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014089 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014090 if (inputStream == NULL) {
14091 xmlFreeParserCtxt(ctxt);
14092 return(NULL);
14093 }
14094
Owen Taylor3473f882001-02-23 17:55:21 +000014095 inputPush(ctxt, inputStream);
14096 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014097 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014098 if ((ctxt->directory == NULL) && (directory != NULL))
14099 ctxt->directory = directory;
14100
14101 return(ctxt);
14102}
14103
Daniel Veillard61b93382003-11-03 14:28:31 +000014104/**
14105 * xmlCreateFileParserCtxt:
14106 * @filename: the filename
14107 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014108 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014109 * Automatic support for ZLIB/Compress compressed document is provided
14110 * by default if found at compile-time.
14111 *
14112 * Returns the new parser context or NULL
14113 */
14114xmlParserCtxtPtr
14115xmlCreateFileParserCtxt(const char *filename)
14116{
14117 return(xmlCreateURLParserCtxt(filename, 0));
14118}
14119
Daniel Veillard81273902003-09-30 00:43:48 +000014120#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014121/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014122 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014123 * @sax: the SAX handler block
14124 * @filename: the filename
14125 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14126 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014127 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014128 *
14129 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14130 * compressed document is provided by default if found at compile-time.
14131 * It use the given SAX function block to handle the parsing callback.
14132 * If sax is NULL, fallback to the default DOM tree building routines.
14133 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014134 * User data (void *) is stored within the parser context in the
14135 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014136 *
Owen Taylor3473f882001-02-23 17:55:21 +000014137 * Returns the resulting document tree
14138 */
14139
14140xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014141xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14142 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014143 xmlDocPtr ret;
14144 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014145
Daniel Veillard635ef722001-10-29 11:48:19 +000014146 xmlInitParser();
14147
Owen Taylor3473f882001-02-23 17:55:21 +000014148 ctxt = xmlCreateFileParserCtxt(filename);
14149 if (ctxt == NULL) {
14150 return(NULL);
14151 }
14152 if (sax != NULL) {
14153 if (ctxt->sax != NULL)
14154 xmlFree(ctxt->sax);
14155 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014156 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014157 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014158 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014159 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014160 }
Owen Taylor3473f882001-02-23 17:55:21 +000014161
Daniel Veillard37d2d162008-03-14 10:54:00 +000014162 if (ctxt->directory == NULL)
14163 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014164
Daniel Veillarddad3f682002-11-17 16:47:27 +000014165 ctxt->recovery = recovery;
14166
Owen Taylor3473f882001-02-23 17:55:21 +000014167 xmlParseDocument(ctxt);
14168
William M. Brackc07329e2003-09-08 01:57:30 +000014169 if ((ctxt->wellFormed) || recovery) {
14170 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014171 if (ret != NULL) {
14172 if (ctxt->input->buf->compressed > 0)
14173 ret->compression = 9;
14174 else
14175 ret->compression = ctxt->input->buf->compressed;
14176 }
William M. Brackc07329e2003-09-08 01:57:30 +000014177 }
Owen Taylor3473f882001-02-23 17:55:21 +000014178 else {
14179 ret = NULL;
14180 xmlFreeDoc(ctxt->myDoc);
14181 ctxt->myDoc = NULL;
14182 }
14183 if (sax != NULL)
14184 ctxt->sax = NULL;
14185 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014186
Owen Taylor3473f882001-02-23 17:55:21 +000014187 return(ret);
14188}
14189
14190/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014191 * xmlSAXParseFile:
14192 * @sax: the SAX handler block
14193 * @filename: the filename
14194 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14195 * documents
14196 *
14197 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14198 * compressed document is provided by default if found at compile-time.
14199 * It use the given SAX function block to handle the parsing callback.
14200 * If sax is NULL, fallback to the default DOM tree building routines.
14201 *
14202 * Returns the resulting document tree
14203 */
14204
14205xmlDocPtr
14206xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14207 int recovery) {
14208 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14209}
14210
14211/**
Owen Taylor3473f882001-02-23 17:55:21 +000014212 * xmlRecoverDoc:
14213 * @cur: a pointer to an array of xmlChar
14214 *
14215 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014216 * In the case the document is not Well Formed, a attempt to build a
14217 * tree is tried anyway
14218 *
14219 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014220 */
14221
14222xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014223xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014224 return(xmlSAXParseDoc(NULL, cur, 1));
14225}
14226
14227/**
14228 * xmlParseFile:
14229 * @filename: the filename
14230 *
14231 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14232 * compressed document is provided by default if found at compile-time.
14233 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014234 * Returns the resulting document tree if the file was wellformed,
14235 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014236 */
14237
14238xmlDocPtr
14239xmlParseFile(const char *filename) {
14240 return(xmlSAXParseFile(NULL, filename, 0));
14241}
14242
14243/**
14244 * xmlRecoverFile:
14245 * @filename: the filename
14246 *
14247 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14248 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014249 * In the case the document is not Well Formed, it attempts to build
14250 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014251 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014252 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014253 */
14254
14255xmlDocPtr
14256xmlRecoverFile(const char *filename) {
14257 return(xmlSAXParseFile(NULL, filename, 1));
14258}
14259
14260
14261/**
14262 * xmlSetupParserForBuffer:
14263 * @ctxt: an XML parser context
14264 * @buffer: a xmlChar * buffer
14265 * @filename: a file name
14266 *
14267 * Setup the parser context to parse a new buffer; Clears any prior
14268 * contents from the parser context. The buffer parameter must not be
14269 * NULL, but the filename parameter can be
14270 */
14271void
14272xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14273 const char* filename)
14274{
14275 xmlParserInputPtr input;
14276
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014277 if ((ctxt == NULL) || (buffer == NULL))
14278 return;
14279
Owen Taylor3473f882001-02-23 17:55:21 +000014280 input = xmlNewInputStream(ctxt);
14281 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014282 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014283 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014284 return;
14285 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014286
Owen Taylor3473f882001-02-23 17:55:21 +000014287 xmlClearParserCtxt(ctxt);
14288 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014289 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014290 input->base = buffer;
14291 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014292 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014293 inputPush(ctxt, input);
14294}
14295
14296/**
14297 * xmlSAXUserParseFile:
14298 * @sax: a SAX handler
14299 * @user_data: The user data returned on SAX callbacks
14300 * @filename: a file name
14301 *
14302 * parse an XML file and call the given SAX handler routines.
14303 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014304 *
Owen Taylor3473f882001-02-23 17:55:21 +000014305 * Returns 0 in case of success or a error number otherwise
14306 */
14307int
14308xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14309 const char *filename) {
14310 int ret = 0;
14311 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014312
Owen Taylor3473f882001-02-23 17:55:21 +000014313 ctxt = xmlCreateFileParserCtxt(filename);
14314 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014315 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014316 xmlFree(ctxt->sax);
14317 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014318 xmlDetectSAX2(ctxt);
14319
Owen Taylor3473f882001-02-23 17:55:21 +000014320 if (user_data != NULL)
14321 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014322
Owen Taylor3473f882001-02-23 17:55:21 +000014323 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014324
Owen Taylor3473f882001-02-23 17:55:21 +000014325 if (ctxt->wellFormed)
14326 ret = 0;
14327 else {
14328 if (ctxt->errNo != 0)
14329 ret = ctxt->errNo;
14330 else
14331 ret = -1;
14332 }
14333 if (sax != NULL)
14334 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014335 if (ctxt->myDoc != NULL) {
14336 xmlFreeDoc(ctxt->myDoc);
14337 ctxt->myDoc = NULL;
14338 }
Owen Taylor3473f882001-02-23 17:55:21 +000014339 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014340
Owen Taylor3473f882001-02-23 17:55:21 +000014341 return ret;
14342}
Daniel Veillard81273902003-09-30 00:43:48 +000014343#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014344
14345/************************************************************************
14346 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014347 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014348 * *
14349 ************************************************************************/
14350
14351/**
14352 * xmlCreateMemoryParserCtxt:
14353 * @buffer: a pointer to a char array
14354 * @size: the size of the array
14355 *
14356 * Create a parser context for an XML in-memory document.
14357 *
14358 * Returns the new parser context or NULL
14359 */
14360xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014361xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014362 xmlParserCtxtPtr ctxt;
14363 xmlParserInputPtr input;
14364 xmlParserInputBufferPtr buf;
14365
14366 if (buffer == NULL)
14367 return(NULL);
14368 if (size <= 0)
14369 return(NULL);
14370
14371 ctxt = xmlNewParserCtxt();
14372 if (ctxt == NULL)
14373 return(NULL);
14374
Daniel Veillard53350552003-09-18 13:35:51 +000014375 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014376 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014377 if (buf == NULL) {
14378 xmlFreeParserCtxt(ctxt);
14379 return(NULL);
14380 }
Owen Taylor3473f882001-02-23 17:55:21 +000014381
14382 input = xmlNewInputStream(ctxt);
14383 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014384 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014385 xmlFreeParserCtxt(ctxt);
14386 return(NULL);
14387 }
14388
14389 input->filename = NULL;
14390 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014391 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014392
14393 inputPush(ctxt, input);
14394 return(ctxt);
14395}
14396
Daniel Veillard81273902003-09-30 00:43:48 +000014397#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014398/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014399 * xmlSAXParseMemoryWithData:
14400 * @sax: the SAX handler block
14401 * @buffer: an pointer to a char array
14402 * @size: the size of the array
14403 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14404 * documents
14405 * @data: the userdata
14406 *
14407 * parse an XML in-memory block and use the given SAX function block
14408 * to handle the parsing callback. If sax is NULL, fallback to the default
14409 * DOM tree building routines.
14410 *
14411 * User data (void *) is stored within the parser context in the
14412 * context's _private member, so it is available nearly everywhere in libxml
14413 *
14414 * Returns the resulting document tree
14415 */
14416
14417xmlDocPtr
14418xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14419 int size, int recovery, void *data) {
14420 xmlDocPtr ret;
14421 xmlParserCtxtPtr ctxt;
14422
Daniel Veillardab2a7632009-07-09 08:45:03 +020014423 xmlInitParser();
14424
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014425 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14426 if (ctxt == NULL) return(NULL);
14427 if (sax != NULL) {
14428 if (ctxt->sax != NULL)
14429 xmlFree(ctxt->sax);
14430 ctxt->sax = sax;
14431 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014432 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014433 if (data!=NULL) {
14434 ctxt->_private=data;
14435 }
14436
Daniel Veillardadba5f12003-04-04 16:09:01 +000014437 ctxt->recovery = recovery;
14438
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014439 xmlParseDocument(ctxt);
14440
14441 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14442 else {
14443 ret = NULL;
14444 xmlFreeDoc(ctxt->myDoc);
14445 ctxt->myDoc = NULL;
14446 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014447 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014448 ctxt->sax = NULL;
14449 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014450
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014451 return(ret);
14452}
14453
14454/**
Owen Taylor3473f882001-02-23 17:55:21 +000014455 * xmlSAXParseMemory:
14456 * @sax: the SAX handler block
14457 * @buffer: an pointer to a char array
14458 * @size: the size of the array
14459 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14460 * documents
14461 *
14462 * parse an XML in-memory block and use the given SAX function block
14463 * to handle the parsing callback. If sax is NULL, fallback to the default
14464 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014465 *
Owen Taylor3473f882001-02-23 17:55:21 +000014466 * Returns the resulting document tree
14467 */
14468xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014469xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14470 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014471 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014472}
14473
14474/**
14475 * xmlParseMemory:
14476 * @buffer: an pointer to a char array
14477 * @size: the size of the array
14478 *
14479 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014480 *
Owen Taylor3473f882001-02-23 17:55:21 +000014481 * Returns the resulting document tree
14482 */
14483
Daniel Veillard50822cb2001-07-26 20:05:51 +000014484xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014485 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14486}
14487
14488/**
14489 * xmlRecoverMemory:
14490 * @buffer: an pointer to a char array
14491 * @size: the size of the array
14492 *
14493 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014494 * In the case the document is not Well Formed, an attempt to
14495 * build a tree is tried anyway
14496 *
14497 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014498 */
14499
Daniel Veillard50822cb2001-07-26 20:05:51 +000014500xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014501 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14502}
14503
14504/**
14505 * xmlSAXUserParseMemory:
14506 * @sax: a SAX handler
14507 * @user_data: The user data returned on SAX callbacks
14508 * @buffer: an in-memory XML document input
14509 * @size: the length of the XML document in bytes
14510 *
14511 * A better SAX parsing routine.
14512 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014513 *
Owen Taylor3473f882001-02-23 17:55:21 +000014514 * Returns 0 in case of success or a error number otherwise
14515 */
14516int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014517 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014518 int ret = 0;
14519 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014520
14521 xmlInitParser();
14522
Owen Taylor3473f882001-02-23 17:55:21 +000014523 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14524 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014525 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14526 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014527 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014528 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014529
Daniel Veillard30211a02001-04-26 09:33:18 +000014530 if (user_data != NULL)
14531 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014532
Owen Taylor3473f882001-02-23 17:55:21 +000014533 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014534
Owen Taylor3473f882001-02-23 17:55:21 +000014535 if (ctxt->wellFormed)
14536 ret = 0;
14537 else {
14538 if (ctxt->errNo != 0)
14539 ret = ctxt->errNo;
14540 else
14541 ret = -1;
14542 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014543 if (sax != NULL)
14544 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014545 if (ctxt->myDoc != NULL) {
14546 xmlFreeDoc(ctxt->myDoc);
14547 ctxt->myDoc = NULL;
14548 }
Owen Taylor3473f882001-02-23 17:55:21 +000014549 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014550
Owen Taylor3473f882001-02-23 17:55:21 +000014551 return ret;
14552}
Daniel Veillard81273902003-09-30 00:43:48 +000014553#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014554
14555/**
14556 * xmlCreateDocParserCtxt:
14557 * @cur: a pointer to an array of xmlChar
14558 *
14559 * Creates a parser context for an XML in-memory document.
14560 *
14561 * Returns the new parser context or NULL
14562 */
14563xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014564xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014565 int len;
14566
14567 if (cur == NULL)
14568 return(NULL);
14569 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014570 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014571}
14572
Daniel Veillard81273902003-09-30 00:43:48 +000014573#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014574/**
14575 * xmlSAXParseDoc:
14576 * @sax: the SAX handler block
14577 * @cur: a pointer to an array of xmlChar
14578 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14579 * documents
14580 *
14581 * parse an XML in-memory document and build a tree.
14582 * It use the given SAX function block to handle the parsing callback.
14583 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014584 *
Owen Taylor3473f882001-02-23 17:55:21 +000014585 * Returns the resulting document tree
14586 */
14587
14588xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014589xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014590 xmlDocPtr ret;
14591 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014592 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014593
Daniel Veillard38936062004-11-04 17:45:11 +000014594 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014595
14596
14597 ctxt = xmlCreateDocParserCtxt(cur);
14598 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014599 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014600 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014601 ctxt->sax = sax;
14602 ctxt->userData = NULL;
14603 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014604 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014605
14606 xmlParseDocument(ctxt);
14607 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14608 else {
14609 ret = NULL;
14610 xmlFreeDoc(ctxt->myDoc);
14611 ctxt->myDoc = NULL;
14612 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014613 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014614 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014615 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014616
Owen Taylor3473f882001-02-23 17:55:21 +000014617 return(ret);
14618}
14619
14620/**
14621 * xmlParseDoc:
14622 * @cur: a pointer to an array of xmlChar
14623 *
14624 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014625 *
Owen Taylor3473f882001-02-23 17:55:21 +000014626 * Returns the resulting document tree
14627 */
14628
14629xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014630xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014631 return(xmlSAXParseDoc(NULL, cur, 0));
14632}
Daniel Veillard81273902003-09-30 00:43:48 +000014633#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014634
Daniel Veillard81273902003-09-30 00:43:48 +000014635#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014636/************************************************************************
14637 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014638 * Specific function to keep track of entities references *
14639 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014640 * *
14641 ************************************************************************/
14642
14643static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14644
14645/**
14646 * xmlAddEntityReference:
14647 * @ent : A valid entity
14648 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014649 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014650 *
14651 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14652 */
14653static void
14654xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14655 xmlNodePtr lastNode)
14656{
14657 if (xmlEntityRefFunc != NULL) {
14658 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14659 }
14660}
14661
14662
14663/**
14664 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014665 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014666 *
14667 * Set the function to call call back when a xml reference has been made
14668 */
14669void
14670xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14671{
14672 xmlEntityRefFunc = func;
14673}
Daniel Veillard81273902003-09-30 00:43:48 +000014674#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014675
14676/************************************************************************
14677 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014678 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014679 * *
14680 ************************************************************************/
14681
14682#ifdef LIBXML_XPATH_ENABLED
14683#include <libxml/xpath.h>
14684#endif
14685
Daniel Veillardffa3c742005-07-21 13:24:09 +000014686extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014687static int xmlParserInitialized = 0;
14688
14689/**
14690 * xmlInitParser:
14691 *
14692 * Initialization function for the XML parser.
14693 * This is not reentrant. Call once before processing in case of
14694 * use in multithreaded programs.
14695 */
14696
14697void
14698xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014699 if (xmlParserInitialized != 0)
14700 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014701
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014702#ifdef LIBXML_THREAD_ENABLED
14703 __xmlGlobalInitMutexLock();
14704 if (xmlParserInitialized == 0) {
14705#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014706 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014707 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014708 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14709 (xmlGenericError == NULL))
14710 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014711 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014712 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014713 xmlInitCharEncodingHandlers();
14714 xmlDefaultSAXHandlerInit();
14715 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014716#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014717 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014718#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014719#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014720 htmlInitAutoClose();
14721 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014722#endif
14723#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014724 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014725#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014726 xmlParserInitialized = 1;
14727#ifdef LIBXML_THREAD_ENABLED
14728 }
14729 __xmlGlobalInitMutexUnlock();
14730#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014731}
14732
14733/**
14734 * xmlCleanupParser:
14735 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014736 * This function name is somewhat misleading. It does not clean up
14737 * parser state, it cleans up memory allocated by the library itself.
14738 * It is a cleanup function for the XML library. It tries to reclaim all
14739 * related global memory allocated for the library processing.
14740 * It doesn't deallocate any document related memory. One should
14741 * call xmlCleanupParser() only when the process has finished using
14742 * the library and all XML/HTML documents built with it.
14743 * See also xmlInitParser() which has the opposite function of preparing
14744 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014745 *
14746 * WARNING: if your application is multithreaded or has plugin support
14747 * calling this may crash the application if another thread or
14748 * a plugin is still using libxml2. It's sometimes very hard to
14749 * guess if libxml2 is in use in the application, some libraries
14750 * or plugins may use it without notice. In case of doubt abstain
14751 * from calling this function or do it just before calling exit()
14752 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014753 */
14754
14755void
14756xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014757 if (!xmlParserInitialized)
14758 return;
14759
Owen Taylor3473f882001-02-23 17:55:21 +000014760 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014761#ifdef LIBXML_CATALOG_ENABLED
14762 xmlCatalogCleanup();
14763#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014764 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014765 xmlCleanupInputCallbacks();
14766#ifdef LIBXML_OUTPUT_ENABLED
14767 xmlCleanupOutputCallbacks();
14768#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014769#ifdef LIBXML_SCHEMAS_ENABLED
14770 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014771 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014772#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014773 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014774 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014775 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014776 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014777 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014778}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014779
14780/************************************************************************
14781 * *
14782 * New set (2.6.0) of simpler and more flexible APIs *
14783 * *
14784 ************************************************************************/
14785
14786/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014787 * DICT_FREE:
14788 * @str: a string
14789 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014790 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014791 * current scope
14792 */
14793#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014794 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014795 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14796 xmlFree((char *)(str));
14797
14798/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014799 * xmlCtxtReset:
14800 * @ctxt: an XML parser context
14801 *
14802 * Reset a parser context
14803 */
14804void
14805xmlCtxtReset(xmlParserCtxtPtr ctxt)
14806{
14807 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014808 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014809
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014810 if (ctxt == NULL)
14811 return;
14812
14813 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014814
14815 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14816 xmlFreeInputStream(input);
14817 }
14818 ctxt->inputNr = 0;
14819 ctxt->input = NULL;
14820
14821 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014822 if (ctxt->spaceTab != NULL) {
14823 ctxt->spaceTab[0] = -1;
14824 ctxt->space = &ctxt->spaceTab[0];
14825 } else {
14826 ctxt->space = NULL;
14827 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014828
14829
14830 ctxt->nodeNr = 0;
14831 ctxt->node = NULL;
14832
14833 ctxt->nameNr = 0;
14834 ctxt->name = NULL;
14835
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014836 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014837 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014838 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014839 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014840 DICT_FREE(ctxt->directory);
14841 ctxt->directory = NULL;
14842 DICT_FREE(ctxt->extSubURI);
14843 ctxt->extSubURI = NULL;
14844 DICT_FREE(ctxt->extSubSystem);
14845 ctxt->extSubSystem = NULL;
14846 if (ctxt->myDoc != NULL)
14847 xmlFreeDoc(ctxt->myDoc);
14848 ctxt->myDoc = NULL;
14849
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014850 ctxt->standalone = -1;
14851 ctxt->hasExternalSubset = 0;
14852 ctxt->hasPErefs = 0;
14853 ctxt->html = 0;
14854 ctxt->external = 0;
14855 ctxt->instate = XML_PARSER_START;
14856 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014857
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014858 ctxt->wellFormed = 1;
14859 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014860 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014861 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014862#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014863 ctxt->vctxt.userData = ctxt;
14864 ctxt->vctxt.error = xmlParserValidityError;
14865 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014866#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014867 ctxt->record_info = 0;
14868 ctxt->nbChars = 0;
14869 ctxt->checkIndex = 0;
14870 ctxt->inSubset = 0;
14871 ctxt->errNo = XML_ERR_OK;
14872 ctxt->depth = 0;
14873 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14874 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014875 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014876 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014877 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014878 xmlInitNodeInfoSeq(&ctxt->node_seq);
14879
14880 if (ctxt->attsDefault != NULL) {
14881 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14882 ctxt->attsDefault = NULL;
14883 }
14884 if (ctxt->attsSpecial != NULL) {
14885 xmlHashFree(ctxt->attsSpecial, NULL);
14886 ctxt->attsSpecial = NULL;
14887 }
14888
Daniel Veillard4432df22003-09-28 18:58:27 +000014889#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014890 if (ctxt->catalogs != NULL)
14891 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014892#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014893 if (ctxt->lastError.code != XML_ERR_OK)
14894 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014895}
14896
14897/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014898 * xmlCtxtResetPush:
14899 * @ctxt: an XML parser context
14900 * @chunk: a pointer to an array of chars
14901 * @size: number of chars in the array
14902 * @filename: an optional file name or URI
14903 * @encoding: the document encoding, or NULL
14904 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014905 * Reset a push parser context
14906 *
14907 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014908 */
14909int
14910xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14911 int size, const char *filename, const char *encoding)
14912{
14913 xmlParserInputPtr inputStream;
14914 xmlParserInputBufferPtr buf;
14915 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14916
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014917 if (ctxt == NULL)
14918 return(1);
14919
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014920 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14921 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14922
14923 buf = xmlAllocParserInputBuffer(enc);
14924 if (buf == NULL)
14925 return(1);
14926
14927 if (ctxt == NULL) {
14928 xmlFreeParserInputBuffer(buf);
14929 return(1);
14930 }
14931
14932 xmlCtxtReset(ctxt);
14933
14934 if (ctxt->pushTab == NULL) {
14935 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14936 sizeof(xmlChar *));
14937 if (ctxt->pushTab == NULL) {
14938 xmlErrMemory(ctxt, NULL);
14939 xmlFreeParserInputBuffer(buf);
14940 return(1);
14941 }
14942 }
14943
14944 if (filename == NULL) {
14945 ctxt->directory = NULL;
14946 } else {
14947 ctxt->directory = xmlParserGetDirectory(filename);
14948 }
14949
14950 inputStream = xmlNewInputStream(ctxt);
14951 if (inputStream == NULL) {
14952 xmlFreeParserInputBuffer(buf);
14953 return(1);
14954 }
14955
14956 if (filename == NULL)
14957 inputStream->filename = NULL;
14958 else
14959 inputStream->filename = (char *)
14960 xmlCanonicPath((const xmlChar *) filename);
14961 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014962 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014963
14964 inputPush(ctxt, inputStream);
14965
14966 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14967 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014968 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14969 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014970
14971 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14972
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014973 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014974#ifdef DEBUG_PUSH
14975 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14976#endif
14977 }
14978
14979 if (encoding != NULL) {
14980 xmlCharEncodingHandlerPtr hdlr;
14981
Daniel Veillard37334572008-07-31 08:20:02 +000014982 if (ctxt->encoding != NULL)
14983 xmlFree((xmlChar *) ctxt->encoding);
14984 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14985
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014986 hdlr = xmlFindCharEncodingHandler(encoding);
14987 if (hdlr != NULL) {
14988 xmlSwitchToEncoding(ctxt, hdlr);
14989 } else {
14990 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14991 "Unsupported encoding %s\n", BAD_CAST encoding);
14992 }
14993 } else if (enc != XML_CHAR_ENCODING_NONE) {
14994 xmlSwitchEncoding(ctxt, enc);
14995 }
14996
14997 return(0);
14998}
14999
Daniel Veillard37334572008-07-31 08:20:02 +000015000
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015001/**
Daniel Veillard37334572008-07-31 08:20:02 +000015002 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015003 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015004 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015005 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015006 *
15007 * Applies the options to the parser context
15008 *
15009 * Returns 0 in case of success, the set of unknown or unimplemented options
15010 * in case of error.
15011 */
Daniel Veillard37334572008-07-31 08:20:02 +000015012static int
15013xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015014{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015015 if (ctxt == NULL)
15016 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015017 if (encoding != NULL) {
15018 if (ctxt->encoding != NULL)
15019 xmlFree((xmlChar *) ctxt->encoding);
15020 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15021 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015022 if (options & XML_PARSE_RECOVER) {
15023 ctxt->recovery = 1;
15024 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015025 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015026 } else
15027 ctxt->recovery = 0;
15028 if (options & XML_PARSE_DTDLOAD) {
15029 ctxt->loadsubset = XML_DETECT_IDS;
15030 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015031 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015032 } else
15033 ctxt->loadsubset = 0;
15034 if (options & XML_PARSE_DTDATTR) {
15035 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15036 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015037 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015038 }
15039 if (options & XML_PARSE_NOENT) {
15040 ctxt->replaceEntities = 1;
15041 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15042 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015043 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015044 } else
15045 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015046 if (options & XML_PARSE_PEDANTIC) {
15047 ctxt->pedantic = 1;
15048 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015049 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015050 } else
15051 ctxt->pedantic = 0;
15052 if (options & XML_PARSE_NOBLANKS) {
15053 ctxt->keepBlanks = 0;
15054 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15055 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015056 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015057 } else
15058 ctxt->keepBlanks = 1;
15059 if (options & XML_PARSE_DTDVALID) {
15060 ctxt->validate = 1;
15061 if (options & XML_PARSE_NOWARNING)
15062 ctxt->vctxt.warning = NULL;
15063 if (options & XML_PARSE_NOERROR)
15064 ctxt->vctxt.error = NULL;
15065 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015066 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015067 } else
15068 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015069 if (options & XML_PARSE_NOWARNING) {
15070 ctxt->sax->warning = NULL;
15071 options -= XML_PARSE_NOWARNING;
15072 }
15073 if (options & XML_PARSE_NOERROR) {
15074 ctxt->sax->error = NULL;
15075 ctxt->sax->fatalError = NULL;
15076 options -= XML_PARSE_NOERROR;
15077 }
Daniel Veillard81273902003-09-30 00:43:48 +000015078#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015079 if (options & XML_PARSE_SAX1) {
15080 ctxt->sax->startElement = xmlSAX2StartElement;
15081 ctxt->sax->endElement = xmlSAX2EndElement;
15082 ctxt->sax->startElementNs = NULL;
15083 ctxt->sax->endElementNs = NULL;
15084 ctxt->sax->initialized = 1;
15085 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015086 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015087 }
Daniel Veillard81273902003-09-30 00:43:48 +000015088#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015089 if (options & XML_PARSE_NODICT) {
15090 ctxt->dictNames = 0;
15091 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015092 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015093 } else {
15094 ctxt->dictNames = 1;
15095 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015096 if (options & XML_PARSE_NOCDATA) {
15097 ctxt->sax->cdataBlock = NULL;
15098 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015099 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015100 }
15101 if (options & XML_PARSE_NSCLEAN) {
15102 ctxt->options |= XML_PARSE_NSCLEAN;
15103 options -= XML_PARSE_NSCLEAN;
15104 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015105 if (options & XML_PARSE_NONET) {
15106 ctxt->options |= XML_PARSE_NONET;
15107 options -= XML_PARSE_NONET;
15108 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015109 if (options & XML_PARSE_COMPACT) {
15110 ctxt->options |= XML_PARSE_COMPACT;
15111 options -= XML_PARSE_COMPACT;
15112 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015113 if (options & XML_PARSE_OLD10) {
15114 ctxt->options |= XML_PARSE_OLD10;
15115 options -= XML_PARSE_OLD10;
15116 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015117 if (options & XML_PARSE_NOBASEFIX) {
15118 ctxt->options |= XML_PARSE_NOBASEFIX;
15119 options -= XML_PARSE_NOBASEFIX;
15120 }
15121 if (options & XML_PARSE_HUGE) {
15122 ctxt->options |= XML_PARSE_HUGE;
15123 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015124 if (ctxt->dict != NULL)
15125 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015126 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015127 if (options & XML_PARSE_OLDSAX) {
15128 ctxt->options |= XML_PARSE_OLDSAX;
15129 options -= XML_PARSE_OLDSAX;
15130 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015131 if (options & XML_PARSE_IGNORE_ENC) {
15132 ctxt->options |= XML_PARSE_IGNORE_ENC;
15133 options -= XML_PARSE_IGNORE_ENC;
15134 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015135 if (options & XML_PARSE_BIG_LINES) {
15136 ctxt->options |= XML_PARSE_BIG_LINES;
15137 options -= XML_PARSE_BIG_LINES;
15138 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015139 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015140 return (options);
15141}
15142
15143/**
Daniel Veillard37334572008-07-31 08:20:02 +000015144 * xmlCtxtUseOptions:
15145 * @ctxt: an XML parser context
15146 * @options: a combination of xmlParserOption
15147 *
15148 * Applies the options to the parser context
15149 *
15150 * Returns 0 in case of success, the set of unknown or unimplemented options
15151 * in case of error.
15152 */
15153int
15154xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15155{
15156 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15157}
15158
15159/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015160 * xmlDoRead:
15161 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015162 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015163 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015164 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015165 * @reuse: keep the context for reuse
15166 *
15167 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015168 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015169 * Returns the resulting document tree or NULL
15170 */
15171static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015172xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15173 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015174{
15175 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015176
15177 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015178 if (encoding != NULL) {
15179 xmlCharEncodingHandlerPtr hdlr;
15180
15181 hdlr = xmlFindCharEncodingHandler(encoding);
15182 if (hdlr != NULL)
15183 xmlSwitchToEncoding(ctxt, hdlr);
15184 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015185 if ((URL != NULL) && (ctxt->input != NULL) &&
15186 (ctxt->input->filename == NULL))
15187 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015188 xmlParseDocument(ctxt);
15189 if ((ctxt->wellFormed) || ctxt->recovery)
15190 ret = ctxt->myDoc;
15191 else {
15192 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015193 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015194 xmlFreeDoc(ctxt->myDoc);
15195 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015196 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015197 ctxt->myDoc = NULL;
15198 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015199 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015200 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015201
15202 return (ret);
15203}
15204
15205/**
15206 * xmlReadDoc:
15207 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015208 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015209 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015210 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015211 *
15212 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015213 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015214 * Returns the resulting document tree
15215 */
15216xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015217xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015218{
15219 xmlParserCtxtPtr ctxt;
15220
15221 if (cur == NULL)
15222 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015223 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015224
15225 ctxt = xmlCreateDocParserCtxt(cur);
15226 if (ctxt == NULL)
15227 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015228 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015229}
15230
15231/**
15232 * xmlReadFile:
15233 * @filename: a file or URL
15234 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015235 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015236 *
15237 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015238 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015239 * Returns the resulting document tree
15240 */
15241xmlDocPtr
15242xmlReadFile(const char *filename, const char *encoding, int options)
15243{
15244 xmlParserCtxtPtr ctxt;
15245
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015246 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015247 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015248 if (ctxt == NULL)
15249 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015250 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015251}
15252
15253/**
15254 * xmlReadMemory:
15255 * @buffer: a pointer to a char array
15256 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015257 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015258 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015259 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015260 *
15261 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015262 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015263 * Returns the resulting document tree
15264 */
15265xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015266xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015267{
15268 xmlParserCtxtPtr ctxt;
15269
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015270 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015271 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15272 if (ctxt == NULL)
15273 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015274 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015275}
15276
15277/**
15278 * xmlReadFd:
15279 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015280 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015281 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015282 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015283 *
15284 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015285 * NOTE that the file descriptor will not be closed when the
15286 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015287 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015288 * Returns the resulting document tree
15289 */
15290xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015291xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015292{
15293 xmlParserCtxtPtr ctxt;
15294 xmlParserInputBufferPtr input;
15295 xmlParserInputPtr stream;
15296
15297 if (fd < 0)
15298 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015299 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015300
15301 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15302 if (input == NULL)
15303 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015304 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015305 ctxt = xmlNewParserCtxt();
15306 if (ctxt == NULL) {
15307 xmlFreeParserInputBuffer(input);
15308 return (NULL);
15309 }
15310 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15311 if (stream == NULL) {
15312 xmlFreeParserInputBuffer(input);
15313 xmlFreeParserCtxt(ctxt);
15314 return (NULL);
15315 }
15316 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015317 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015318}
15319
15320/**
15321 * xmlReadIO:
15322 * @ioread: an I/O read function
15323 * @ioclose: an I/O close function
15324 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015325 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015326 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015327 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015328 *
15329 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015330 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015331 * Returns the resulting document tree
15332 */
15333xmlDocPtr
15334xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015335 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015336{
15337 xmlParserCtxtPtr ctxt;
15338 xmlParserInputBufferPtr input;
15339 xmlParserInputPtr stream;
15340
15341 if (ioread == NULL)
15342 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015343 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015344
15345 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15346 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015347 if (input == NULL) {
15348 if (ioclose != NULL)
15349 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015350 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015351 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015352 ctxt = xmlNewParserCtxt();
15353 if (ctxt == NULL) {
15354 xmlFreeParserInputBuffer(input);
15355 return (NULL);
15356 }
15357 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15358 if (stream == NULL) {
15359 xmlFreeParserInputBuffer(input);
15360 xmlFreeParserCtxt(ctxt);
15361 return (NULL);
15362 }
15363 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015364 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015365}
15366
15367/**
15368 * xmlCtxtReadDoc:
15369 * @ctxt: an XML parser context
15370 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015371 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015372 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015373 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015374 *
15375 * parse an XML in-memory document and build a tree.
15376 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015377 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015378 * Returns the resulting document tree
15379 */
15380xmlDocPtr
15381xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015382 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015383{
15384 xmlParserInputPtr stream;
15385
15386 if (cur == NULL)
15387 return (NULL);
15388 if (ctxt == NULL)
15389 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015390 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015391
15392 xmlCtxtReset(ctxt);
15393
15394 stream = xmlNewStringInputStream(ctxt, cur);
15395 if (stream == NULL) {
15396 return (NULL);
15397 }
15398 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015399 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015400}
15401
15402/**
15403 * xmlCtxtReadFile:
15404 * @ctxt: an XML parser context
15405 * @filename: a file or URL
15406 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015407 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408 *
15409 * parse an XML file from the filesystem or the network.
15410 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015411 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015412 * Returns the resulting document tree
15413 */
15414xmlDocPtr
15415xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15416 const char *encoding, int options)
15417{
15418 xmlParserInputPtr stream;
15419
15420 if (filename == NULL)
15421 return (NULL);
15422 if (ctxt == NULL)
15423 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015424 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015425
15426 xmlCtxtReset(ctxt);
15427
Daniel Veillard29614c72004-11-26 10:47:26 +000015428 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015429 if (stream == NULL) {
15430 return (NULL);
15431 }
15432 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015433 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015434}
15435
15436/**
15437 * xmlCtxtReadMemory:
15438 * @ctxt: an XML parser context
15439 * @buffer: a pointer to a char array
15440 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015441 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015442 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015443 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015444 *
15445 * parse an XML in-memory document and build a tree.
15446 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015447 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015448 * Returns the resulting document tree
15449 */
15450xmlDocPtr
15451xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015452 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015453{
15454 xmlParserInputBufferPtr input;
15455 xmlParserInputPtr stream;
15456
15457 if (ctxt == NULL)
15458 return (NULL);
15459 if (buffer == NULL)
15460 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015461 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015462
15463 xmlCtxtReset(ctxt);
15464
15465 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15466 if (input == NULL) {
15467 return(NULL);
15468 }
15469
15470 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15471 if (stream == NULL) {
15472 xmlFreeParserInputBuffer(input);
15473 return(NULL);
15474 }
15475
15476 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015477 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015478}
15479
15480/**
15481 * xmlCtxtReadFd:
15482 * @ctxt: an XML parser context
15483 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015484 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015485 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015486 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015487 *
15488 * parse an XML from a file descriptor and build a tree.
15489 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015490 * NOTE that the file descriptor will not be closed when the
15491 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015492 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015493 * Returns the resulting document tree
15494 */
15495xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015496xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15497 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015498{
15499 xmlParserInputBufferPtr input;
15500 xmlParserInputPtr stream;
15501
15502 if (fd < 0)
15503 return (NULL);
15504 if (ctxt == NULL)
15505 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015506 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015507
15508 xmlCtxtReset(ctxt);
15509
15510
15511 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15512 if (input == NULL)
15513 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015514 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015515 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15516 if (stream == NULL) {
15517 xmlFreeParserInputBuffer(input);
15518 return (NULL);
15519 }
15520 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015521 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015522}
15523
15524/**
15525 * xmlCtxtReadIO:
15526 * @ctxt: an XML parser context
15527 * @ioread: an I/O read function
15528 * @ioclose: an I/O close function
15529 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015530 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015531 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015532 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015533 *
15534 * parse an XML document from I/O functions and source and build a tree.
15535 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015536 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015537 * Returns the resulting document tree
15538 */
15539xmlDocPtr
15540xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15541 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015542 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015543 const char *encoding, int options)
15544{
15545 xmlParserInputBufferPtr input;
15546 xmlParserInputPtr stream;
15547
15548 if (ioread == NULL)
15549 return (NULL);
15550 if (ctxt == NULL)
15551 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015552 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015553
15554 xmlCtxtReset(ctxt);
15555
15556 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15557 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015558 if (input == NULL) {
15559 if (ioclose != NULL)
15560 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015561 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015562 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015563 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15564 if (stream == NULL) {
15565 xmlFreeParserInputBuffer(input);
15566 return (NULL);
15567 }
15568 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015569 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015570}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015571
15572#define bottom_parser
15573#include "elfgcchack.h"